Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4          ✔ readr     2.1.5     
## ✔ forcats   1.0.1          ✔ stringr   1.6.0     
## ✔ ggplot2   4.0.1.9000     ✔ tibble    3.3.0     
## ✔ lubridate 1.9.4          ✔ tidyr     1.3.1     
## ✔ purrr     1.2.0          
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  tidyverse,
  fixest,
  glue,
  ggrepel,
  splines,
  patchwork,
  lme4
)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

# read an OWID CSV, extract the value column(s), return entity/code/year/value
read_owid <- function(path, var_name = NULL) {
  if (is.null(var_name)) var_name <- tools::file_path_sans_ext(basename(path))

  d <- read_csv(path, show_col_types = FALSE)

  # drop annotation and region columns
  d <- d %>% select(-matches("annotation|owid_region"))

  # the value column is whatever isn't entity/code/year
  value_cols <- setdiff(names(d), c("entity", "code", "year"))

  if (length(value_cols) == 1) {
    d <- d %>% rename(!!var_name := all_of(value_cols))
  } else {
    # multiple value cols: take first numeric one
    for (vc in value_cols) {
      if (is.numeric(d[[vc]])) {
        d <- d %>% select(entity, code, year, !!var_name := all_of(vc))
        break
      }
    }
  }

  d %>% select(entity, code, year, all_of(var_name))
}

# read OECD SDMX-format CSV, aggregate quarterly to annual
read_oecd_housing <- function(path) {
  d <- read_csv(path, show_col_types = FALSE)

  # price-to-income ratio
  d_yieldh <- d %>%
    filter(MEASURE == "HPI_YDH", FREQ == "Q") %>%
    mutate(year = as.integer(str_extract(TIME_PERIOD, "^\\d{4}")), code = REF_AREA) %>%
    group_by(code, year) %>%
    summarise(housing_price_income = mean(OBS_VALUE, na.rm = TRUE), .groups = "drop")

  # real house prices (broader coverage, includes Turkey)
  d_rhp <- d %>%
    filter(MEASURE == "RHP", FREQ == "Q") %>%
    mutate(year = as.integer(str_extract(TIME_PERIOD, "^\\d{4}")), code = REF_AREA) %>%
    group_by(code, year) %>%
    summarise(real_house_price = mean(OBS_VALUE, na.rm = TRUE), .groups = "drop")

  full_join(d_yieldh, d_rhp, by = c("code", "year"))
}

# standardize by cross-sectional SD in reference year (2000)
# ref_data: optionally use a different dataset for the SD (e.g., world sample for OECD)
standardize_cs_sd <- function(data, varname, ref_year = 2000, ref_data = NULL) {
  if (is.null(ref_data)) ref_data <- data
  sd_ref <- sd(ref_data[[varname]][ref_data$year == ref_year], na.rm = TRUE)
  if (is.na(sd_ref) || sd_ref == 0) return(rep(NA_real_, nrow(data)))
  data[[varname]] / sd_ref
}

# run bivariate FE regressions, both raw and standardized
# time_control: "none" = country FE only, "linear" = country FE + linear year, "fe" = country + year FE
# ref_data: dataset to use for computing the standardization SD
run_biv_fe <- function(data, label = "", ref_data = NULL, time_control = "fe") {
  predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()

  map_df(predictors, function(xvar) {
    d_tmp <- data %>%
      mutate(x_std = standardize_cs_sd(., xvar, ref_data = ref_data)) %>%
      select(tfr, code, year, x = all_of(xvar), x_std) %>%
      drop_na()

    if (nrow(d_tmp) < 50 || n_distinct(d_tmp$code) < 5) {
      return(tibble(variable = xvar, sample = label, n = nrow(d_tmp),
                    n_countries = n_distinct(d_tmp$code),
                    coef_raw = NA_real_, se_raw = NA_real_,
                    coef_std = NA_real_, se_std = NA_real_, p = NA_real_))
    }

    fml_raw <- switch(time_control,
      none = tfr ~ x | code,
      linear = tfr ~ x + year | code,
      fe = tfr ~ x | code + year
    )
    fml_std <- switch(time_control,
      none = tfr ~ x_std | code,
      linear = tfr ~ x_std + year | code,
      fe = tfr ~ x_std | code + year
    )

    fit_raw <- feols(fml_raw, data = d_tmp)
    fit_std <- feols(fml_std, data = d_tmp)
    s_raw <- summary(fit_raw)
    s_std <- summary(fit_std)

    tibble(
      variable = xvar, sample = label,
      n = nrow(d_tmp), n_countries = n_distinct(d_tmp$code),
      coef_raw = coef(fit_raw)["x"], se_raw = s_raw$se["x"],
      coef_std = coef(fit_std)["x_std"], se_std = s_std$se["x_std"],
      p = s_raw$coeftable["x", "Pr(>|t|)"]
    )
  })
}

# OECD member country codes (excluding middle-income members: COL, CRI, MEX, CHL, TUR)
oecd_codes <- c(
  "AUS", "AUT", "BEL", "CAN", "CZE", "DNK", "EST",
  "FIN", "FRA", "DEU", "GRC", "HUN", "ISL", "IRL", "ISR", "ITA", "JPN",
  "KOR", "LVA", "LTU", "LUX", "NLD", "NZL", "NOR", "POL", "PRT",
  "SVK", "SVN", "ESP", "SWE", "CHE", "GBR", "USA"
)

Data

# read all OWID CSVs from data/owid_raw/
raw_dir <- "data/owid_raw"
csv_files <- list.files(raw_dir, pattern = "\\.csv$", full.names = TRUE)
owid_list <- map(csv_files, read_owid)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
names(owid_list) <- tools::file_path_sans_ext(basename(csv_files))
map_df(owid_list, ~tibble(rows = nrow(.x), cols = ncol(.x)), .id = "indicator")
# read OECD housing data (price-to-income + real house prices)
d_housing <- read_oecd_housing("data/OECD.ECO.MPD,DSD_AN_HOUSE_PRICES@DF_HOUSE_PRICES,1.0+all.csv")

# drop variables with known scaling/interpretation issues
owid_list[["pop_density"]] <- NULL

# join all indicators into one panel
d <- owid_list %>%
  reduce(full_join, by = c("entity", "code", "year")) %>%
  left_join(d_housing, by = c("code", "year"))

# filter to 1960+ (pre-1960 data is sparse and includes dodgy historical estimates)
d <- d %>% filter(year >= 1960)

# two samples
d_all <- d %>% filter(!is.na(tfr))
d_oecd <- d %>% filter(code %in% oecd_codes, !is.na(tfr))

cat("All countries:", nrow(d_all), "rows,", n_distinct(d_all$code), "countries\n")
## All countries: 16576 rows, 255 countries
cat("OECD only:", nrow(d_oecd), "rows,", n_distinct(d_oecd$code), "countries\n")
## OECD only: 2112 rows, 33 countries
# coverage per variable in each sample
bind_rows(
  d_all %>% select(-entity, -code, -year) %>%
    summarise(across(everything(), ~mean(!is.na(.x)))) %>%
    pivot_longer(everything(), names_to = "variable", values_to = "coverage") %>%
    mutate(sample = "all"),
  d_oecd %>% select(-entity, -code, -year) %>%
    summarise(across(everything(), ~mean(!is.na(.x)))) %>%
    pivot_longer(everything(), names_to = "variable", values_to = "coverage") %>%
    mutate(sample = "oecd")
) %>%
  pivot_wider(names_from = sample, values_from = coverage) %>%
  arrange(desc(all))

Analysis

Bivariate FE: all specifications

6 specs: {level TFR, log TFR} × {no year control, linear year, year FE}, both samples standardized to world yr-2000 SD.

d_all_log <- d_all %>% mutate(tfr = log(tfr))
d_oecd_log <- d_oecd %>% mutate(tfr = log(tfr))

# run all 6 specs × 2 samples = 12 variants
run_all_specs <- function(d_level, d_log, sample_label, ref_data) {
  bind_rows(
    run_biv_fe(d_level, sample_label, ref_data, time_control = "none") %>% mutate(tfr_coding = "level", time_ctrl = "none"),
    run_biv_fe(d_level, sample_label, ref_data, time_control = "linear") %>% mutate(tfr_coding = "level", time_ctrl = "linear"),
    run_biv_fe(d_level, sample_label, ref_data, time_control = "fe") %>% mutate(tfr_coding = "level", time_ctrl = "year FE"),
    run_biv_fe(d_log, sample_label, ref_data, time_control = "none") %>% mutate(tfr_coding = "log", time_ctrl = "none"),
    run_biv_fe(d_log, sample_label, ref_data, time_control = "linear") %>% mutate(tfr_coding = "log", time_ctrl = "linear"),
    run_biv_fe(d_log, sample_label, ref_data, time_control = "fe") %>% mutate(tfr_coding = "log", time_ctrl = "year FE")
  )
}

biv_specs_all <- run_all_specs(d_all, d_all_log, "All countries", d_all)
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 16 fixed-effect singletons were removed (16 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## NOTE: 5 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## NOTE: 41 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 9 fixed-effect singletons were removed (9 observations).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
biv_specs_oecd <- run_all_specs(d_oecd, d_oecd_log, "OECD", d_all)
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## NOTE: 6 fixed-effect singletons were removed (6 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
biv_specs <- bind_rows(biv_specs_all, biv_specs_oecd) %>% filter(!is.na(coef_raw))

# for backward compatibility
biv <- biv_specs %>% filter(tfr_coding == "level", time_ctrl == "year FE")
biv_log <- biv_specs %>% filter(tfr_coding == "log", time_ctrl == "year FE")

biv_specs %>%
  select(variable, sample, tfr_coding, time_ctrl, coef_std, p, n) %>%
  arrange(variable, sample, tfr_coding, time_ctrl)

Level vs log TFR: which fits better?

Compare RMSE on the level TFR scale for both codings. If log RMSE < level RMSE, the predictor is better thought of as affecting fertility proportionally rather than absolutely.

compare_level_log <- function(data, label = "") {
  predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()

  map_df(predictors, function(xvar) {
    d_tmp <- data %>% select(tfr, code, year, x = all_of(xvar)) %>% drop_na()
    if (nrow(d_tmp) < 50 || n_distinct(d_tmp$code) < 5) return(NULL)

    fit_level <- feols(tfr ~ x | code + year, data = d_tmp)
    fit_log <- feols(log(tfr) ~ x | code + year, data = d_tmp)

    rmse_level <- sqrt(mean(residuals(fit_level)^2))
    pred_level_from_log <- exp(log(d_tmp$tfr) - residuals(fit_log))
    rmse_log <- sqrt(mean((d_tmp$tfr - pred_level_from_log)^2))

    tibble(variable = xvar, sample = label,
           rmse_level = rmse_level, rmse_log = rmse_log,
           log_better = rmse_log < rmse_level,
           rmse_ratio = rmse_log / rmse_level)
  })
}

coding_all <- compare_level_log(d_all, "All countries")
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
coding_oecd <- compare_level_log(d_oecd, "OECD")
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## Warning in log(d_tmp$tfr) - residuals(fit_log): longer object length is not a
## multiple of shorter object length
coding_results <- bind_rows(coding_all, coding_oecd)

coding_results %>%
  select(variable, sample, rmse_level, rmse_log, rmse_ratio, log_better) %>%
  arrange(sample, rmse_ratio)
coding_results %>%
  ggplot(aes(x = rmse_level, y = rmse_log, label = variable, color = sample)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
  geom_text_repel(size = 2.5) +
  labs(x = "RMSE (level TFR model)", y = "RMSE (log TFR, back-transformed)",
       title = "Level vs log TFR: RMSE on level scale",
       subtitle = "Below diagonal = log fits better",
       color = "Sample") +
  theme(legend.position = "bottom")
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave("figs/level_vs_log_rmse.png", width = 10, height = 8, dpi = 150)
## Warning: ggrepel: 17 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Stability across specs

Coefficient of variation (SD/|mean|) of raw betas across the 6 specs per TFR coding. Lower = more stable. Sign flips flagged separately.

stability <- biv_specs %>%
  filter(!is.na(coef_raw)) %>%
  group_by(variable, tfr_coding) %>%
  filter(n() >= 3) %>%
  summarise(
    n_specs = n(),
    mean_beta = mean(coef_raw),
    sd_beta = sd(coef_raw),
    cv = sd_beta / abs(mean_beta),
    sign_flips = n_distinct(sign(coef_raw)) > 1,
    .groups = "drop"
  )

stability %>%
  select(variable, tfr_coding, mean_beta, cv, sign_flips) %>%
  arrange(tfr_coding, desc(cv))
stability %>%
  mutate(variable = fct_reorder(variable, cv, .fun = mean)) %>%
  ggplot(aes(x = cv, y = variable, color = tfr_coding, shape = sign_flips)) +
  geom_point(size = 2, position = position_dodge(width = 0.4)) +
  scale_shape_manual(values = c("TRUE" = 4, "FALSE" = 16)) +
  scale_x_log10() +
  labs(
    x = "Coefficient of variation of raw beta across specs",
    y = NULL,
    title = "Stability of effects across specifications",
    subtitle = "X = sign flips across specs; lower CV = more stable",
    color = "TFR coding", shape = "Sign flip"
  ) +
  theme(legend.position = "bottom")

ggsave("figs/stability_across_specs.png", width = 10, height = 9, dpi = 150)

Nonlinearity: spline vs linear

run_spline_test <- function(data, label = "", spline_df = 4) {
  predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()

  map_df(predictors, function(xvar) {
    d_tmp <- data %>% select(tfr, code, year, x = all_of(xvar)) %>% drop_na()

    if (nrow(d_tmp) < 100 || n_distinct(d_tmp$code) < 10) {
      return(tibble(variable = xvar, sample = label,
                    r2w_linear = NA_real_, r2w_spline = NA_real_,
                    f_stat = NA_real_, p_nonlinear = NA_real_))
    }

    fit_lin <- feols(tfr ~ x | code + year, data = d_tmp)
    fit_spl <- feols(tfr ~ ns(x, df = spline_df) | code + year, data = d_tmp)

    # F-test: does spline improve over linear? (nested via RSS comparison)
    rss_lin <- sum(residuals(fit_lin)^2)
    rss_spl <- sum(residuals(fit_spl)^2)
    df_diff <- spline_df - 1
    df_resid <- fit_spl$nobs - fit_spl$nparams - length(fixef(fit_spl))
    f_stat <- ((rss_lin - rss_spl) / df_diff) / (rss_spl / df_resid)
    p_nonlinear <- pf(f_stat, df_diff, df_resid, lower.tail = FALSE)

    tibble(
      variable = xvar, sample = label,
      r2w_linear = fitstat(fit_lin, "wr2")[[1]],
      r2w_spline = fitstat(fit_spl, "wr2")[[1]],
      f_stat = f_stat,
      p_nonlinear = p_nonlinear
    )
  })
}

spline_all_level <- run_spline_test(d_all, "World, level TFR")
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
spline_oecd_level <- run_spline_test(d_oecd, "OECD, level TFR")
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
spline_all_log <- run_spline_test(d_all_log, "World, log TFR")
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
spline_oecd_log <- run_spline_test(d_oecd_log, "OECD, log TFR")
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
spline_results <- bind_rows(spline_all_level, spline_oecd_level, spline_all_log, spline_oecd_log) %>%
  filter(!is.na(r2w_linear))

spline_results %>%
  mutate(r2w_gain = r2w_spline - r2w_linear) %>%
  select(variable, sample, r2w_linear, r2w_spline, r2w_gain, f_stat, p_nonlinear) %>%
  arrange(sample, p_nonlinear)
spline_results %>%
  mutate(sample = fct_relevel(sample, "World, level TFR", "OECD, level TFR", "World, log TFR", "OECD, log TFR")) %>%
  ggplot(aes(x = r2w_linear, y = r2w_spline, label = variable)) +
  geom_point(size = 1.5) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
  geom_text_repel(size = 2) +
  facet_wrap(~sample, nrow = 2) +
  labs(
    x = "Within R-squared (linear)", y = "Within R-squared (spline, df=4)",
    title = "Nonlinearity: spline vs linear"
  )
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps

ggsave("figs/spline_vs_linear.png", width = 12, height = 10, dpi = 150)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Consistency across specifications

12 bivariate specs: {level TFR, log TFR} × {no year, linear year, year FE} × {World, OECD}. All standardized to world yr-2000 SD.

# summary table: sign consistency and significance count
consistency <- biv_specs %>%
  mutate(spec = paste0(sample, "_", tfr_coding, "_", time_ctrl)) %>%
  select(variable, spec, coef_std, p) %>%
  filter(!is.na(coef_std)) %>%
  group_by(variable) %>%
  summarise(
    n_specs = n(),
    n_positive = sum(coef_std > 0),
    n_negative = sum(coef_std < 0),
    consistent_sign = n_positive == n_specs | n_negative == n_specs,
    n_sig = sum(p < 0.05),
    mean_std_beta = mean(coef_std),
    .groups = "drop"
  ) %>%
  arrange(desc(consistent_sign), desc(n_sig))

consistency
# consistency plot
var_order <- biv_specs %>%
  filter(!is.na(coef_std), tfr_coding == "level") %>%
  group_by(variable) %>%
  summarise(mean_abs = mean(abs(coef_std))) %>%
  arrange(mean_abs) %>%
  pull(variable)

p_level <- biv_specs %>%
  filter(!is.na(coef_std), tfr_coding == "level") %>%
  mutate(
    time_ctrl = fct_relevel(time_ctrl, "none", "linear", "year FE"),
    variable = factor(variable, levels = var_order)
  ) %>%
  ggplot(aes(x = coef_std, y = variable, color = time_ctrl, shape = sample)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  geom_point(position = position_dodge(width = 0.6), size = 2) +
  scale_shape_manual(values = c("All countries" = 16, "OECD" = 1)) +
  labs(x = "Standardized beta", y = NULL, title = "Level TFR",
       color = "Time control", shape = "Sample")

p_log <- biv_specs %>%
  filter(!is.na(coef_std), tfr_coding == "log") %>%
  mutate(
    time_ctrl = fct_relevel(time_ctrl, "none", "linear", "year FE"),
    variable = factor(variable, levels = var_order)
  ) %>%
  ggplot(aes(x = coef_std, y = variable, color = time_ctrl, shape = sample)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  geom_point(position = position_dodge(width = 0.6), size = 2) +
  scale_shape_manual(values = c("All countries" = 16, "OECD" = 1)) +
  labs(x = "Standardized beta", y = NULL, title = "Log TFR") +
  theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
  guides(color = "none", shape = "none")

p_level + p_log +
  plot_layout(guides = "collect") & theme(legend.position = "bottom")

ggsave("figs/consistency_across_specs.png", width = 14, height = 9, dpi = 150)

Slope heterogeneity (random slopes)

Random slopes model estimates how much the effect of each predictor varies across countries, separating true heterogeneity (τ²) from sampling error. A truly causal effect should be relatively homogeneous.

run_slope_heterogeneity <- function(data, label = "") {
  predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()

  map_df(predictors, function(xvar) {
    d_tmp <- data %>% select(tfr, code, year, x = all_of(xvar)) %>% drop_na()

    if (nrow(d_tmp) < 100 || n_distinct(d_tmp$code) < 10) {
      return(tibble(variable = xvar, sample = label,
                    fixed_slope = NA_real_, tau_slope = NA_real_,
                    slope_ratio = NA_real_))
    }

    # random intercept + random slope by country, with linear year trend
    fit <- tryCatch(
      suppressWarnings(lmer(tfr ~ x + year + (1 + x | code), data = d_tmp, REML = TRUE,
           control = lmerControl(optimizer = "bobyqa"))),
      error = function(e) NULL
    )

    if (is.null(fit)) {
      return(tibble(variable = xvar, sample = label,
                    fixed_slope = NA_real_, tau_slope = NA_real_,
                    slope_ratio = NA_real_))
    }

    vc <- as.data.frame(VarCorr(fit))
    tau_slope <- vc$sdcor[vc$var1 == "x" & is.na(vc$var2)]
    fixed_slope <- fixef(fit)["x"]

    tibble(
      variable = xvar, sample = label,
      fixed_slope = fixed_slope,
      tau_slope = tau_slope,
      slope_ratio = tau_slope / abs(fixed_slope)  # CV-like: how variable is the slope?
    )
  })
}

het_all <- run_slope_heterogeneity(d_all, "All countries")
het_oecd <- run_slope_heterogeneity(d_oecd, "OECD")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
het_results <- bind_rows(het_all, het_oecd) %>% filter(!is.na(fixed_slope))

het_results %>%
  select(variable, sample, fixed_slope, tau_slope, slope_ratio) %>%
  arrange(sample, slope_ratio)
het_results %>%
  filter(!is.na(slope_ratio)) %>%
  mutate(variable = fct_reorder(variable, slope_ratio, .fun = mean)) %>%
  ggplot(aes(x = slope_ratio, y = variable, color = sample)) +
  geom_point(size = 2, position = position_dodge(width = 0.4)) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "grey50") +
  scale_x_log10() +
  labs(
    x = "Slope heterogeneity (Ï„ / |fixed slope|)",
    y = NULL,
    title = "Between-country slope heterogeneity",
    subtitle = "Random slopes model. < 1 = effect is consistent across countries",
    color = "Sample"
  ) +
  theme(legend.position = "bottom")

ggsave("figs/slope_heterogeneity.png", width = 10, height = 9, dpi = 150)

Multivariate FE models

run_multi_fe <- function(data, min_coverage = 0.5, label = "") {
  predictors <- data %>% select(-entity, -code, -year, -tfr) %>% names()

  cov_df <- tibble(
    variable = predictors,
    coverage = map_dbl(predictors, ~mean(!is.na(data[[.x]])))
  ) %>% filter(coverage > min_coverage)

  good_vars <- cov_df$variable
  cat(label, "- vars with >", min_coverage * 100, "% coverage:", paste(good_vars, collapse = ", "), "\n")

  fml_fe <- as.formula(paste("tfr ~", paste(good_vars, collapse = " + "), "| code"))
  fml_twfe <- as.formula(paste("tfr ~", paste(good_vars, collapse = " + "), "| code + year"))

  d_m <- data %>% select(tfr, code, year, all_of(good_vars)) %>% drop_na()
  cat(label, "- sample:", nrow(d_m), "obs,", n_distinct(d_m$code), "countries\n")

  fit_fe <- feols(fml_fe, data = d_m)
  fit_twfe <- feols(fml_twfe, data = d_m)

  etable(fit_fe, fit_twfe, headers = c(paste(label, "Country FE"), paste(label, "Country+Year FE")))
}

run_multi_fe(d_all, label = "All countries")
## All countries - vars with > 50 % coverage: child_mortality, cpi, gdp_pc_growth, gdp_pc_ppp, infant_mortality, life_expectancy, median_age, old_age_dependency, urbanization, women_parliament 
## All countries - sample: 6115 obs, 155 countries
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
run_multi_fe(d_oecd, label = "OECD")
## OECD - vars with > 50 % coverage: child_mortality, cpi, female_emp, female_lfp, gdp_pc_growth, gdp_pc_ppp, gdp_pc, govt_expenditure, infant_mortality, life_expectancy, median_age, old_age_dependency, social_spending, tertiary_enroll, unemployment, urbanization, women_parliament, housing_price_income, real_house_price 
## OECD - sample: 641 obs, 33 countries
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).
## Warning: The VCOV matrix is not positive semi-definite and was 'fixed' (see
## ?vcov).

Summary table

# within R² from TWFE bivariate models (level TFR only)
wr2_results <- biv_specs %>%
  filter(tfr_coding == "level", time_ctrl == "year FE", !is.na(coef_raw)) %>%
  select(variable, sample) %>%
  pmap_df(function(variable, sample) {
    data <- if (sample == "All countries") d_all else d_oecd
    d_tmp <- data %>% select(tfr, code, year, x = all_of(variable)) %>% drop_na()
    if (nrow(d_tmp) < 50 || n_distinct(d_tmp$code) < 5) return(NULL)
    fit <- feols(tfr ~ x | code + year, data = d_tmp)
    tibble(variable = variable, sample = sample, wr2 = fitstat(fit, "wr2")[[1]])
  })
## NOTE: 16/2 fixed-effect singletons were removed (18 observations).
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
## NOTE: 5/0 fixed-effect singletons were removed (5 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 41/0 fixed-effect singletons were removed (41 observations).
## NOTE: 3/0 fixed-effect singletons were removed (3 observations).
## NOTE: 9/5 fixed-effect singletons were removed (14 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/6 fixed-effect singletons were removed (6 observations).
## NOTE: 6/14 fixed-effect singletons were removed (19 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 1/0 fixed-effect singleton was removed (1 observation).
## NOTE: 0/5 fixed-effect singletons were removed (5 observations).
## NOTE: 0/10 fixed-effect singletons were removed (10 observations).
## NOTE: 0/8 fixed-effect singletons were removed (8 observations).
# combine all summary metrics
summary_table <- stability %>%
  filter(tfr_coding == "level") %>%
  select(variable, cv_level = cv, sign_flip_level = sign_flips) %>%
  left_join(
    stability %>% filter(tfr_coding == "log") %>% select(variable, cv_log = cv, sign_flip_log = sign_flips),
    by = "variable"
  ) %>%
  left_join(
    biv_specs %>%
      filter(tfr_coding == "level", time_ctrl == "year FE") %>%
      group_by(variable) %>%
      summarise(mean_std_beta = mean(coef_std, na.rm = TRUE), .groups = "drop"),
    by = "variable"
  ) %>%
  left_join(
    wr2_results %>%
      pivot_wider(names_from = sample, values_from = wr2, names_prefix = "wr2_"),
    by = "variable"
  ) %>%
  left_join(
    het_results %>%
      group_by(variable) %>%
      summarise(mean_het = mean(slope_ratio, na.rm = TRUE), .groups = "drop"),
    by = "variable"
  ) %>%
  mutate(
    sign_flip = sign_flip_level | sign_flip_log,
    mean_cv = (cv_level + cv_log) / 2
  ) %>%
  select(variable, mean_std_beta, mean_cv, sign_flip,
         `wr2_All countries`, wr2_OECD, mean_het) %>%
  arrange(desc(abs(mean_std_beta)))

summary_table
# rank variables across metrics (higher = better)
summary_table <- summary_table %>%
  mutate(
    rank_beta = rank(-abs(mean_std_beta)),             # larger abs beta = better
    rank_cv = rank(mean_cv),                            # lower CV = better
    rank_wr2_all = rank(-replace_na(`wr2_All countries`, 0)),  # larger wr2 = better
    rank_wr2_oecd = rank(-replace_na(wr2_OECD, 0)),           # larger wr2 = better
    rank_het = rank(replace_na(mean_het, Inf)),         # lower heterogeneity = better
    mean_rank = (rank_beta + rank_cv + rank_wr2_all + rank_wr2_oecd + rank_het) / 5
  ) %>%
  arrange(mean_rank)

summary_table %>% select(variable, mean_std_beta, mean_cv, sign_flip, `wr2_All countries`, wr2_OECD, mean_het, mean_rank)
var_rank_order <- summary_table %>% arrange(desc(mean_rank)) %>% pull(variable)

summary_long <- summary_table %>%
  pivot_longer(c(mean_std_beta, mean_cv, `wr2_All countries`, wr2_OECD, mean_het),
               names_to = "metric", values_to = "value") %>%
  mutate(
    metric = recode(metric,
      mean_std_beta = "Std beta (mean)",
      mean_cv = "CV (mean)",
      `wr2_All countries` = "Within R² (World)",
      wr2_OECD = "Within R² (OECD)",
      mean_het = "Slope heterogeneity (τ/|β|)"
    ),
    metric = fct_relevel(metric, "Std beta (mean)", "CV (mean)", "Within R² (World)", "Within R² (OECD)", "Slope heterogeneity (τ/|β|)"),
    variable = factor(variable, levels = var_rank_order)
  )

summary_long %>%
  ggplot(aes(x = value, y = variable)) +
  geom_col() +
  facet_wrap(~metric, scales = "free_x", nrow = 1) +
  labs(y = NULL, x = NULL,
       title = "Summary: effect size, stability, and explanatory power") +
  theme(strip.text = element_text(size = 9))

ggsave("figs/summary_table_plot.png", width = 16, height = 9, dpi = 150)

Meta

#versions
write_sessioninfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] splines   stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lme4_1.1-37           Matrix_1.7-4          patchwork_1.3.2.9000 
##  [4] ggrepel_0.9.6         glue_1.8.0            fixest_0.13.2        
##  [7] kirkegaard_2025-11-19 robustbase_0.99-6     psych_2.5.6          
## [10] assertthat_0.2.1      weights_1.1.2         magrittr_2.0.4       
## [13] lubridate_1.9.4       forcats_1.0.1         stringr_1.6.0        
## [16] dplyr_1.1.4           purrr_1.2.0           readr_2.1.5          
## [19] tidyr_1.3.1           tibble_3.3.0          ggplot2_4.0.1.9000   
## [22] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] Rdpack_2.6.4        mnormt_2.1.1        gridExtra_2.3      
##  [4] sandwich_3.1-1      rlang_1.1.6.9000    multcomp_1.4-28    
##  [7] dreamerr_1.5.0      compiler_4.5.3      gdata_3.0.1        
## [10] systemfonts_1.3.1   vctrs_0.6.5         crayon_1.5.3       
## [13] pkgconfig_2.0.3     shape_1.4.6.1       fastmap_1.2.0      
## [16] backports_1.5.0     labeling_0.4.3      rmarkdown_2.30     
## [19] tzdb_0.5.0          nloptr_2.2.1        ragg_1.5.0         
## [22] bit_4.6.0           xfun_0.57           glmnet_4.1-10      
## [25] jomo_2.7-6          cachem_1.1.0        jsonlite_2.0.0     
## [28] stringmagic_1.2.0   pan_1.9             broom_1.0.11       
## [31] parallel_4.5.3      cluster_2.1.8.2     R6_2.6.1           
## [34] bslib_0.9.0         stringi_1.8.7       RColorBrewer_1.1-3 
## [37] boot_1.3-32         rpart_4.1.24        jquerylib_0.1.4    
## [40] numDeriv_2016.8-1.1 estimability_1.5.1  Rcpp_1.1.0         
## [43] iterators_1.0.14    knitr_1.50          zoo_1.8-14         
## [46] base64enc_0.1-3     nnet_7.3-20         timechange_0.3.0   
## [49] tidyselect_1.2.1    rstudioapi_0.17.1   yaml_2.3.10        
## [52] codetools_0.2-20    lattice_0.22-7      withr_3.0.2        
## [55] S7_0.2.1            coda_0.19-4.1       evaluate_1.0.5     
## [58] foreign_0.8-91      archive_1.1.12      survival_3.8-6     
## [61] pillar_1.11.1       mice_3.18.0         checkmate_2.3.3    
## [64] foreach_1.5.2       reformulas_0.4.1    generics_0.1.4     
## [67] vroom_1.6.6         hms_1.1.3           scales_1.4.0       
## [70] minqa_1.2.8         gtools_3.9.5        xtable_1.8-4       
## [73] emmeans_1.11.2-8    Hmisc_5.2-4         tools_4.5.3        
## [76] data.table_1.17.8   mvtnorm_1.3-3       grid_4.5.3         
## [79] rbibutils_2.3       colorspace_2.1-2    nlme_3.1-168       
## [82] htmlTable_2.4.3     Formula_1.2-5       cli_3.6.5          
## [85] textshaping_1.0.4   gtable_0.3.6        DEoptimR_1.1-4     
## [88] sass_0.4.10         digest_0.6.39       TH.data_1.1-4      
## [91] htmlwidgets_1.6.4   farver_2.1.2        htmltools_0.5.8.1  
## [94] lifecycle_1.0.4     mitml_0.4-5         bit64_4.6.0-1      
## [97] MASS_7.3-65
#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)

  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))

  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")

  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
    conflicts = "overwrite"
    )
}