library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1.9000 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
## Loading required package: robustbase
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
tidyverse,
fixest,
glue,
ggrepel,
splines,
patchwork,
lme4
)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
theme_set(theme_bw())
options(
digits = 3
)
#multithreading
#library(future)
#plan(multisession(workers = 8))
# read an OWID CSV, extract the value column(s), return entity/code/year/value
read_owid <- function(path, var_name = NULL) {
if (is.null(var_name)) var_name <- tools::file_path_sans_ext(basename(path))
d <- read_csv(path, show_col_types = FALSE)
# drop annotation and region columns
d <- d %>% select(-matches("annotation|owid_region"))
# the value column is whatever isn't entity/code/year
value_cols <- setdiff(names(d), c("entity", "code", "year"))
if (length(value_cols) == 1) {
d <- d %>% rename(!!var_name := all_of(value_cols))
} else {
# multiple value cols: take first numeric one
for (vc in value_cols) {
if (is.numeric(d[[vc]])) {
d <- d %>% select(entity, code, year, !!var_name := all_of(vc))
break
}
}
}
d %>% select(entity, code, year, all_of(var_name))
}
# read OECD SDMX-format CSV, aggregate quarterly to annual
read_oecd_housing <- function(path) {
d <- read_csv(path, show_col_types = FALSE)
# price-to-income ratio
d_yieldh <- d %>%
filter(MEASURE == "HPI_YDH", FREQ == "Q") %>%
mutate(year = as.integer(str_extract(TIME_PERIOD, "^\\d{4}")), code = REF_AREA) %>%
group_by(code, year) %>%
summarise(housing_price_income = mean(OBS_VALUE, na.rm = TRUE), .groups = "drop")
# real house prices (broader coverage, includes Turkey)
d_rhp <- d %>%
filter(MEASURE == "RHP", FREQ == "Q") %>%
mutate(year = as.integer(str_extract(TIME_PERIOD, "^\\d{4}")), code = REF_AREA) %>%
group_by(code, year) %>%
summarise(real_house_price = mean(OBS_VALUE, na.rm = TRUE), .groups = "drop")
full_join(d_yieldh, d_rhp, by = c("code", "year"))
}
# standardize by cross-sectional SD in reference year (2000)
# ref_data: optionally use a different dataset for the SD (e.g., world sample for OECD)
standardize_cs_sd <- function(data, varname, ref_year = 2000, ref_data = NULL) {
if (is.null(ref_data)) ref_data <- data
sd_ref <- sd(ref_data[[varname]][ref_data$year == ref_year], na.rm = TRUE)
if (is.na(sd_ref) || sd_ref == 0) return(rep(NA_real_, nrow(data)))
data[[varname]] / sd_ref
}
# run bivariate FE regressions, both raw and standardized
# time_control: "none" = country FE only, "linear" = country FE + linear year, "fe" = country + year FE
# ref_data: dataset to use for computing the standardization SD
run_biv_fe <- function(data, label = "", ref_data = NULL, time_control = "fe") {
predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()
map_df(predictors, function(xvar) {
d_tmp <- data %>%
mutate(x_std = standardize_cs_sd(., xvar, ref_data = ref_data)) %>%
select(tfr, code, year, x = all_of(xvar), x_std) %>%
drop_na()
if (nrow(d_tmp) < 50 || n_distinct(d_tmp$code) < 5) {
return(tibble(variable = xvar, sample = label, n = nrow(d_tmp),
n_countries = n_distinct(d_tmp$code),
coef_raw = NA_real_, se_raw = NA_real_,
coef_std = NA_real_, se_std = NA_real_, p = NA_real_))
}
fml_raw <- switch(time_control,
none = tfr ~ x | code,
linear = tfr ~ x + year | code,
fe = tfr ~ x | code + year
)
fml_std <- switch(time_control,
none = tfr ~ x_std | code,
linear = tfr ~ x_std + year | code,
fe = tfr ~ x_std | code + year
)
fit_raw <- feols(fml_raw, data = d_tmp)
fit_std <- feols(fml_std, data = d_tmp)
s_raw <- summary(fit_raw)
s_std <- summary(fit_std)
tibble(
variable = xvar, sample = label,
n = nrow(d_tmp), n_countries = n_distinct(d_tmp$code),
coef_raw = coef(fit_raw)["x"], se_raw = s_raw$se["x"],
coef_std = coef(fit_std)["x_std"], se_std = s_std$se["x_std"],
p = s_raw$coeftable["x", "Pr(>|t|)"]
)
})
}
# OECD member country codes (excluding middle-income members: COL, CRI, MEX, CHL, TUR)
oecd_codes <- c(
"AUS", "AUT", "BEL", "CAN", "CZE", "DNK", "EST",
"FIN", "FRA", "DEU", "GRC", "HUN", "ISL", "IRL", "ISR", "ITA", "JPN",
"KOR", "LVA", "LTU", "LUX", "NLD", "NZL", "NOR", "POL", "PRT",
"SVK", "SVN", "ESP", "SWE", "CHE", "GBR", "USA"
)
# read all OWID CSVs from data/owid_raw/
raw_dir <- "data/owid_raw"
csv_files <- list.files(raw_dir, pattern = "\\.csv$", full.names = TRUE)
owid_list <- map(csv_files, read_owid)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
names(owid_list) <- tools::file_path_sans_ext(basename(csv_files))
map_df(owid_list, ~tibble(rows = nrow(.x), cols = ncol(.x)), .id = "indicator")
# read OECD housing data (price-to-income + real house prices)
d_housing <- read_oecd_housing("data/OECD.ECO.MPD,DSD_AN_HOUSE_PRICES@DF_HOUSE_PRICES,1.0+all.csv")
# drop variables with known scaling/interpretation issues
owid_list[["pop_density"]] <- NULL
# join all indicators into one panel
d <- owid_list %>%
reduce(full_join, by = c("entity", "code", "year")) %>%
left_join(d_housing, by = c("code", "year"))
# filter to 1960+ (pre-1960 data is sparse and includes dodgy historical estimates)
d <- d %>% filter(year >= 1960)
# two samples
d_all <- d %>% filter(!is.na(tfr))
d_oecd <- d %>% filter(code %in% oecd_codes, !is.na(tfr))
cat("All countries:", nrow(d_all), "rows,", n_distinct(d_all$code), "countries\n")
## All countries: 16576 rows, 255 countries
cat("OECD only:", nrow(d_oecd), "rows,", n_distinct(d_oecd$code), "countries\n")
## OECD only: 2112 rows, 33 countries
# coverage per variable in each sample
bind_rows(
d_all %>% select(-entity, -code, -year) %>%
summarise(across(everything(), ~mean(!is.na(.x)))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "coverage") %>%
mutate(sample = "all"),
d_oecd %>% select(-entity, -code, -year) %>%
summarise(across(everything(), ~mean(!is.na(.x)))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "coverage") %>%
mutate(sample = "oecd")
) %>%
pivot_wider(names_from = sample, values_from = coverage) %>%
arrange(desc(all))
6 specs: {level TFR, log TFR} × {no year control, linear year, year FE}, both samples standardized to world yr-2000 SD.
d_all_log <- d_all %>% mutate(tfr = log(tfr))
d_oecd_log <- d_oecd %>% mutate(tfr = log(tfr))
# run all 6 specs × 2 samples = 12 variants
run_all_specs <- function(d_level, d_log, sample_label, ref_data) {
bind_rows(
run_biv_fe(d_level, sample_label, ref_data, time_control = "none") %>% mutate(tfr_coding = "level", time_ctrl = "none"),
run_biv_fe(d_level, sample_label, ref_data, time_control = "linear") %>% mutate(tfr_coding = "level", time_ctrl = "linear"),
run_biv_fe(d_level, sample_label, ref_data, time_control = "fe") %>% mutate(tfr_coding = "level", time_ctrl = "year FE"),
run_biv_fe(d_log, sample_label, ref_data, time_control = "none") %>% mutate(tfr_coding = "log", time_ctrl = "none"),
run_biv_fe(d_log, sample_label, ref_data, time_control = "linear") %>% mutate(tfr_coding = "log", time_ctrl = "linear"),
run_biv_fe(d_log, sample_label, ref_data, time_control = "fe") %>% mutate(tfr_coding = "log", time_ctrl = "year FE")
)
}
biv_specs_all <- run_all_specs(d_all, d_all_log, "All countries", d_all)
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
biv_specs_oecd <- run_all_specs(d_oecd, d_oecd_log, "OECD", d_all)
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
biv_specs <- bind_rows(biv_specs_all, biv_specs_oecd) %>% filter(!is.na(coef_raw))
# for backward compatibility
biv <- biv_specs %>% filter(tfr_coding == "level", time_ctrl == "year FE")
biv_log <- biv_specs %>% filter(tfr_coding == "log", time_ctrl == "year FE")
biv_specs %>%
select(variable, sample, tfr_coding, time_ctrl, coef_std, p, n) %>%
arrange(variable, sample, tfr_coding, time_ctrl)
Compare RMSE on the level TFR scale for both codings. If log RMSE < level RMSE, the predictor is better thought of as affecting fertility proportionally rather than absolutely.
compare_level_log <- function(data, label = "") {
predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()
map_df(predictors, function(xvar) {
d_tmp <- data %>% select(tfr, code, year, x = all_of(xvar)) %>% drop_na()
if (nrow(d_tmp) < 50 || n_distinct(d_tmp$code) < 5) return(NULL)
fit_level <- feols(tfr ~ x | code + year, data = d_tmp)
fit_log <- feols(log(tfr) ~ x | code + year, data = d_tmp)
rmse_level <- sqrt(mean(residuals(fit_level)^2))
pred_level_from_log <- exp(log(d_tmp$tfr) - residuals(fit_log))
rmse_log <- sqrt(mean((d_tmp$tfr - pred_level_from_log)^2))
tibble(variable = xvar, sample = label,
rmse_level = rmse_level, rmse_log = rmse_log,
log_better = rmse_log < rmse_level,
rmse_ratio = rmse_log / rmse_level)
})
}
coding_all <- compare_level_log(d_all, "All countries")
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
coding_oecd <- compare_level_log(d_oecd, "OECD")
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
coding_results <- bind_rows(coding_all, coding_oecd)
coding_results %>%
select(variable, sample, rmse_level, rmse_log, rmse_ratio, log_better) %>%
arrange(sample, rmse_ratio)
coding_results %>%
ggplot(aes(x = rmse_level, y = rmse_log, label = variable, color = sample)) +
geom_point() +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
geom_text_repel(size = 2.5) +
labs(x = "RMSE (level TFR model)", y = "RMSE (log TFR, back-transformed)",
title = "Level vs log TFR: RMSE on level scale",
subtitle = "Below diagonal = log fits better",
color = "Sample") +
theme(legend.position = "bottom")
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave("figs/level_vs_log_rmse.png", width = 10, height = 8, dpi = 150)
## Warning: ggrepel: 17 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Coefficient of variation (SD/|mean|) of raw betas across the 6 specs per TFR coding. Lower = more stable. Sign flips flagged separately.
stability <- biv_specs %>%
filter(!is.na(coef_raw)) %>%
group_by(variable, tfr_coding) %>%
filter(n() >= 3) %>%
summarise(
n_specs = n(),
mean_beta = mean(coef_raw),
sd_beta = sd(coef_raw),
cv = sd_beta / abs(mean_beta),
sign_flips = n_distinct(sign(coef_raw)) > 1,
.groups = "drop"
)
stability %>%
select(variable, tfr_coding, mean_beta, cv, sign_flips) %>%
arrange(tfr_coding, desc(cv))
stability %>%
mutate(variable = fct_reorder(variable, cv, .fun = mean)) %>%
ggplot(aes(x = cv, y = variable, color = tfr_coding, shape = sign_flips)) +
geom_point(size = 2, position = position_dodge(width = 0.4)) +
scale_shape_manual(values = c("TRUE" = 4, "FALSE" = 16)) +
scale_x_log10() +
labs(
x = "Coefficient of variation of raw beta across specs",
y = NULL,
title = "Stability of effects across specifications",
subtitle = "X = sign flips across specs; lower CV = more stable",
color = "TFR coding", shape = "Sign flip"
) +
theme(legend.position = "bottom")
ggsave("figs/stability_across_specs.png", width = 10, height = 9, dpi = 150)
run_spline_test <- function(data, label = "", spline_df = 4) {
predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()
map_df(predictors, function(xvar) {
d_tmp <- data %>% select(tfr, code, year, x = all_of(xvar)) %>% drop_na()
if (nrow(d_tmp) < 100 || n_distinct(d_tmp$code) < 10) {
return(tibble(variable = xvar, sample = label,
r2w_linear = NA_real_, r2w_spline = NA_real_,
f_stat = NA_real_, p_nonlinear = NA_real_))
}
fit_lin <- feols(tfr ~ x | code + year, data = d_tmp)
fit_spl <- feols(tfr ~ ns(x, df = spline_df) | code + year, data = d_tmp)
# F-test: does spline improve over linear? (nested via RSS comparison)
rss_lin <- sum(residuals(fit_lin)^2)
rss_spl <- sum(residuals(fit_spl)^2)
df_diff <- spline_df - 1
df_resid <- fit_spl$nobs - fit_spl$nparams - length(fixef(fit_spl))
f_stat <- ((rss_lin - rss_spl) / df_diff) / (rss_spl / df_resid)
p_nonlinear <- pf(f_stat, df_diff, df_resid, lower.tail = FALSE)
tibble(
variable = xvar, sample = label,
r2w_linear = fitstat(fit_lin, "wr2")[[1]],
r2w_spline = fitstat(fit_spl, "wr2")[[1]],
f_stat = f_stat,
p_nonlinear = p_nonlinear
)
})
}
spline_all_level <- run_spline_test(d_all, "World, level TFR")
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
spline_oecd_level <- run_spline_test(d_oecd, "OECD, level TFR")
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
spline_all_log <- run_spline_test(d_all_log, "World, log TFR")
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
spline_oecd_log <- run_spline_test(d_oecd_log, "OECD, log TFR")
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
spline_results <- bind_rows(spline_all_level, spline_oecd_level, spline_all_log, spline_oecd_log) %>%
filter(!is.na(r2w_linear))
spline_results %>%
mutate(r2w_gain = r2w_spline - r2w_linear) %>%
select(variable, sample, r2w_linear, r2w_spline, r2w_gain, f_stat, p_nonlinear) %>%
arrange(sample, p_nonlinear)
spline_results %>%
mutate(sample = fct_relevel(sample, "World, level TFR", "OECD, level TFR", "World, log TFR", "OECD, log TFR")) %>%
ggplot(aes(x = r2w_linear, y = r2w_spline, label = variable)) +
geom_point(size = 1.5) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
geom_text_repel(size = 2) +
facet_wrap(~sample, nrow = 2) +
labs(
x = "Within R-squared (linear)", y = "Within R-squared (spline, df=4)",
title = "Nonlinearity: spline vs linear"
)
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggsave("figs/spline_vs_linear.png", width = 12, height = 10, dpi = 150)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
12 bivariate specs: {level TFR, log TFR} × {no year, linear year, year FE} × {World, OECD}. All standardized to world yr-2000 SD.
# summary table: sign consistency and significance count
consistency <- biv_specs %>%
mutate(spec = paste0(sample, "_", tfr_coding, "_", time_ctrl)) %>%
select(variable, spec, coef_std, p) %>%
filter(!is.na(coef_std)) %>%
group_by(variable) %>%
summarise(
n_specs = n(),
n_positive = sum(coef_std > 0),
n_negative = sum(coef_std < 0),
consistent_sign = n_positive == n_specs | n_negative == n_specs,
n_sig = sum(p < 0.05),
mean_std_beta = mean(coef_std),
.groups = "drop"
) %>%
arrange(desc(consistent_sign), desc(n_sig))
consistency
# consistency plot
var_order <- biv_specs %>%
filter(!is.na(coef_std), tfr_coding == "level") %>%
group_by(variable) %>%
summarise(mean_abs = mean(abs(coef_std))) %>%
arrange(mean_abs) %>%
pull(variable)
p_level <- biv_specs %>%
filter(!is.na(coef_std), tfr_coding == "level") %>%
mutate(
time_ctrl = fct_relevel(time_ctrl, "none", "linear", "year FE"),
variable = factor(variable, levels = var_order)
) %>%
ggplot(aes(x = coef_std, y = variable, color = time_ctrl, shape = sample)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_point(position = position_dodge(width = 0.6), size = 2) +
scale_shape_manual(values = c("All countries" = 16, "OECD" = 1)) +
labs(x = "Standardized beta", y = NULL, title = "Level TFR",
color = "Time control", shape = "Sample")
p_log <- biv_specs %>%
filter(!is.na(coef_std), tfr_coding == "log") %>%
mutate(
time_ctrl = fct_relevel(time_ctrl, "none", "linear", "year FE"),
variable = factor(variable, levels = var_order)
) %>%
ggplot(aes(x = coef_std, y = variable, color = time_ctrl, shape = sample)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_point(position = position_dodge(width = 0.6), size = 2) +
scale_shape_manual(values = c("All countries" = 16, "OECD" = 1)) +
labs(x = "Standardized beta", y = NULL, title = "Log TFR") +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
guides(color = "none", shape = "none")
p_level + p_log +
plot_layout(guides = "collect") & theme(legend.position = "bottom")
ggsave("figs/consistency_across_specs.png", width = 14, height = 9, dpi = 150)
Random slopes model estimates how much the effect of each predictor varies across countries, separating true heterogeneity (τ²) from sampling error. A truly causal effect should be relatively homogeneous.
run_slope_heterogeneity <- function(data, label = "") {
predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()
map_df(predictors, function(xvar) {
d_tmp <- data %>% select(tfr, code, year, x = all_of(xvar)) %>% drop_na()
if (nrow(d_tmp) < 100 || n_distinct(d_tmp$code) < 10) {
return(tibble(variable = xvar, sample = label,
fixed_slope = NA_real_, tau_slope = NA_real_,
slope_ratio = NA_real_))
}
# random intercept + random slope by country, with linear year trend
fit <- tryCatch(
suppressWarnings(lmer(tfr ~ x + year + (1 + x | code), data = d_tmp, REML = TRUE,
control = lmerControl(optimizer = "bobyqa"))),
error = function(e) NULL
)
if (is.null(fit)) {
return(tibble(variable = xvar, sample = label,
fixed_slope = NA_real_, tau_slope = NA_real_,
slope_ratio = NA_real_))
}
vc <- as.data.frame(VarCorr(fit))
tau_slope <- vc$sdcor[vc$var1 == "x" & is.na(vc$var2)]
fixed_slope <- fixef(fit)["x"]
tibble(
variable = xvar, sample = label,
fixed_slope = fixed_slope,
tau_slope = tau_slope,
slope_ratio = tau_slope / abs(fixed_slope) # CV-like: how variable is the slope?
)
})
}
het_all <- run_slope_heterogeneity(d_all, "All countries")
het_oecd <- run_slope_heterogeneity(d_oecd, "OECD")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
het_results <- bind_rows(het_all, het_oecd) %>% filter(!is.na(fixed_slope))
het_results %>%
select(variable, sample, fixed_slope, tau_slope, slope_ratio) %>%
arrange(sample, slope_ratio)
het_results %>%
filter(!is.na(slope_ratio)) %>%
mutate(variable = fct_reorder(variable, slope_ratio, .fun = mean)) %>%
ggplot(aes(x = slope_ratio, y = variable, color = sample)) +
geom_point(size = 2, position = position_dodge(width = 0.4)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "grey50") +
scale_x_log10() +
labs(
x = "Slope heterogeneity (Ï„ / |fixed slope|)",
y = NULL,
title = "Between-country slope heterogeneity",
subtitle = "Random slopes model. < 1 = effect is consistent across countries",
color = "Sample"
) +
theme(legend.position = "bottom")
ggsave("figs/slope_heterogeneity.png", width = 10, height = 9, dpi = 150)
run_multi_fe <- function(data, min_coverage = 0.5, label = "") {
predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()
cov_df <- tibble(
variable = predictors,
coverage = map_dbl(predictors, ~mean(!is.na(data[[.x]])))
) %>% filter(coverage > min_coverage)
good_vars <- cov_df$variable
cat(label, "- vars with >", min_coverage * 100, "% coverage:", paste(good_vars, collapse = ", "), "\n")
fml_fe <- as.formula(paste("tfr ~", paste(good_vars, collapse = " + "), "| code"))
fml_twfe <- as.formula(paste("tfr ~", paste(good_vars, collapse = " + "), "| code + year"))
d_m <- data %>% select(tfr, code, year, all_of(good_vars)) %>% drop_na()
cat(label, "- sample:", nrow(d_m), "obs,", n_distinct(d_m$code), "countries\n")
fit_fe <- feols(fml_fe, data = d_m)
fit_twfe <- feols(fml_twfe, data = d_m)
etable(fit_fe, fit_twfe, headers = c(paste(label, "Country FE"), paste(label, "Country+Year FE")))
}
run_multi_fe(d_all, label = "All countries")
## All countries - vars with > 50 % coverage: child_mortality, cpi, gdp_pc_growth, gdp_pc_ppp, infant_mortality, life_expectancy, median_age, old_age_dependency, urbanization, women_parliament
## All countries - sample: 6115 obs, 155 countries
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
run_multi_fe(d_oecd, label = "OECD")
## OECD - vars with > 50 % coverage: child_mortality, cpi, female_emp, female_lfp, gdp_pc_growth, gdp_pc_ppp, gdp_pc, govt_expenditure, infant_mortality, life_expectancy, median_age, old_age_dependency, social_spending, tertiary_enroll, unemployment, urbanization, women_parliament, housing_price_income, real_house_price
## OECD - sample: 641 obs, 33 countries
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
# within R² from TWFE bivariate models (level TFR only)
wr2_results <- biv_specs %>%
filter(tfr_coding == "level", time_ctrl == "year FE", !is.na(coef_raw)) %>%
select(variable, sample) %>%
pmap_df(function(variable, sample) {
data <- if (sample == "All countries") d_all else d_oecd
d_tmp <- data %>% select(tfr, code, year, x = all_of(variable)) %>% drop_na()
if (nrow(d_tmp) < 50 || n_distinct(d_tmp$code) < 5) return(NULL)
fit <- feols(tfr ~ x | code + year, data = d_tmp)
tibble(variable = variable, sample = sample, wr2 = fitstat(fit, "wr2")[[1]])
})
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
# combine all summary metrics
summary_table <- stability %>%
filter(tfr_coding == "level") %>%
select(variable, cv_level = cv, sign_flip_level = sign_flips) %>%
left_join(
stability %>% filter(tfr_coding == "log") %>% select(variable, cv_log = cv, sign_flip_log = sign_flips),
by = "variable"
) %>%
left_join(
biv_specs %>%
filter(tfr_coding == "level", time_ctrl == "year FE") %>%
group_by(variable) %>%
summarise(mean_std_beta = mean(coef_std, na.rm = TRUE), .groups = "drop"),
by = "variable"
) %>%
left_join(
wr2_results %>%
pivot_wider(names_from = sample, values_from = wr2, names_prefix = "wr2_"),
by = "variable"
) %>%
left_join(
het_results %>%
group_by(variable) %>%
summarise(mean_het = mean(slope_ratio, na.rm = TRUE), .groups = "drop"),
by = "variable"
) %>%
mutate(
sign_flip = sign_flip_level | sign_flip_log,
mean_cv = (cv_level + cv_log) / 2
) %>%
select(variable, mean_std_beta, mean_cv, sign_flip,
`wr2_All countries`, wr2_OECD, mean_het) %>%
arrange(desc(abs(mean_std_beta)))
summary_table
# rank variables across metrics (higher = better)
summary_table <- summary_table %>%
mutate(
rank_beta = rank(-abs(mean_std_beta)), # larger abs beta = better
rank_cv = rank(mean_cv), # lower CV = better
rank_wr2_all = rank(-replace_na(`wr2_All countries`, 0)), # larger wr2 = better
rank_wr2_oecd = rank(-replace_na(wr2_OECD, 0)), # larger wr2 = better
rank_het = rank(replace_na(mean_het, Inf)), # lower heterogeneity = better
mean_rank = (rank_beta + rank_cv + rank_wr2_all + rank_wr2_oecd + rank_het) / 5
) %>%
arrange(mean_rank)
summary_table %>% select(variable, mean_std_beta, mean_cv, sign_flip, `wr2_All countries`, wr2_OECD, mean_het, mean_rank)
var_rank_order <- summary_table %>% arrange(desc(mean_rank)) %>% pull(variable)
summary_long <- summary_table %>%
pivot_longer(c(mean_std_beta, mean_cv, `wr2_All countries`, wr2_OECD, mean_het),
names_to = "metric", values_to = "value") %>%
mutate(
metric = recode(metric,
mean_std_beta = "Std beta (mean)",
mean_cv = "CV (mean)",
`wr2_All countries` = "Within R² (World)",
wr2_OECD = "Within R² (OECD)",
mean_het = "Slope heterogeneity (τ/|β|)"
),
metric = fct_relevel(metric, "Std beta (mean)", "CV (mean)", "Within R² (World)", "Within R² (OECD)", "Slope heterogeneity (τ/|β|)"),
variable = factor(variable, levels = var_rank_order)
)
summary_long %>%
ggplot(aes(x = value, y = variable)) +
geom_col() +
facet_wrap(~metric, scales = "free_x", nrow = 1) +
labs(y = NULL, x = NULL,
title = "Summary: effect size, stability, and explanatory power") +
theme(strip.text = element_text(size = 9))
ggsave("figs/summary_table_plot.png", width = 16, height = 9, dpi = 150)
#versions
write_sessioninfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] splines stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lme4_1.1-37 Matrix_1.7-4 patchwork_1.3.2.9000
## [4] ggrepel_0.9.6 glue_1.8.0 fixest_0.13.2
## [7] kirkegaard_2025-11-19 robustbase_0.99-6 psych_2.5.6
## [10] assertthat_0.2.1 weights_1.1.2 magrittr_2.0.4
## [13] lubridate_1.9.4 forcats_1.0.1 stringr_1.6.0
## [16] dplyr_1.1.4 purrr_1.2.0 readr_2.1.5
## [19] tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.1.9000
## [22] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] Rdpack_2.6.4 mnormt_2.1.1 gridExtra_2.3
## [4] sandwich_3.1-1 rlang_1.1.6.9000 multcomp_1.4-28
## [7] dreamerr_1.5.0 compiler_4.5.3 gdata_3.0.1
## [10] systemfonts_1.3.1 vctrs_0.6.5 crayon_1.5.3
## [13] pkgconfig_2.0.3 shape_1.4.6.1 fastmap_1.2.0
## [16] backports_1.5.0 labeling_0.4.3 rmarkdown_2.30
## [19] tzdb_0.5.0 nloptr_2.2.1 ragg_1.5.0
## [22] bit_4.6.0 xfun_0.57 glmnet_4.1-10
## [25] jomo_2.7-6 cachem_1.1.0 jsonlite_2.0.0
## [28] stringmagic_1.2.0 pan_1.9 broom_1.0.11
## [31] parallel_4.5.3 cluster_2.1.8.2 R6_2.6.1
## [34] bslib_0.9.0 stringi_1.8.7 RColorBrewer_1.1-3
## [37] boot_1.3-32 rpart_4.1.24 jquerylib_0.1.4
## [40] numDeriv_2016.8-1.1 estimability_1.5.1 Rcpp_1.1.0
## [43] iterators_1.0.14 knitr_1.50 zoo_1.8-14
## [46] base64enc_0.1-3 nnet_7.3-20 timechange_0.3.0
## [49] tidyselect_1.2.1 rstudioapi_0.17.1 yaml_2.3.10
## [52] codetools_0.2-20 lattice_0.22-7 withr_3.0.2
## [55] S7_0.2.1 coda_0.19-4.1 evaluate_1.0.5
## [58] foreign_0.8-91 archive_1.1.12 survival_3.8-6
## [61] pillar_1.11.1 mice_3.18.0 checkmate_2.3.3
## [64] foreach_1.5.2 reformulas_0.4.1 generics_0.1.4
## [67] vroom_1.6.6 hms_1.1.3 scales_1.4.0
## [70] minqa_1.2.8 gtools_3.9.5 xtable_1.8-4
## [73] emmeans_1.11.2-8 Hmisc_5.2-4 tools_4.5.3
## [76] data.table_1.17.8 mvtnorm_1.3-3 grid_4.5.3
## [79] rbibutils_2.3 colorspace_2.1-2 nlme_3.1-168
## [82] htmlTable_2.4.3 Formula_1.2-5 cli_3.6.5
## [85] textshaping_1.0.4 gtable_0.3.6 DEoptimR_1.1-4
## [88] sass_0.4.10 digest_0.6.39 TH.data_1.1-4
## [91] htmlwidgets_1.6.4 farver_2.1.2 htmltools_0.5.8.1
## [94] lifecycle_1.0.4 mitml_0.4-5 bit64_4.6.0-1
## [97] MASS_7.3-65
#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}