Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4          ✔ readr     2.1.5     
## ✔ forcats   1.0.1          ✔ stringr   1.6.0     
## ✔ ggplot2   4.0.1.9000     ✔ tibble    3.3.0     
## ✔ lubridate 1.9.4          ✔ tidyr     1.3.1     
## ✔ purrr     1.2.0          
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  ebpm,
  patchwork,
  rnaturalearth,
  sf,
  readxl,
  modelsummary
)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
## 
## Attaching package: 'modelsummary'
## 
## The following object is masked from 'package:psych':
## 
##     SD
theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

summarize_participation = function(d) {
  totals = d %>% group_by(season) %>% summarize(n_games_total = n_distinct(year), .groups = "drop")
  d %>%
    group_by(iso3, country, season) %>%
    summarize(n_games = n(), .groups = "drop") %>%
    left_join(totals, by = "season") %>%
    mutate(frac_games = n_games / n_games_total)
}

add_per_capita = function(d, pop, by) {
  d %>%
    inner_join(pop, by = by) %>%
    mutate(
      gold_pc = gold / avg_pop,
      silver_pc = silver / avg_pop,
      bronze_pc = bronze / avg_pop,
      total_pc = total / avg_pop
    )
}

# empirical Bayes shrinkage via ebpm (Poisson means with Gamma prior)
add_eb = function(d) {
  d %>%
    mutate(
      gold_eb = ebpm_gamma(gold, avg_pop)$posterior$mean,
      silver_eb = ebpm_gamma(silver, avg_pop)$posterior$mean,
      bronze_eb = ebpm_gamma(bronze, avg_pop)$posterior$mean,
      total_eb = ebpm_gamma(total, avg_pop)$posterior$mean
    )
}

plot_map = function(d, iso3_col = "iso3", title = "") {
  map_data = world %>%
    filter(iso_a3_eh != "ATA") %>%
    left_join(d %>% select(iso3 = !!sym(iso3_col), total_eb), by = c("iso_a3_eh" = "iso3"))

  ggplot(map_data) +
    geom_sf(aes(fill = total_eb), color = "grey40", linewidth = 0.1) +
    scale_fill_viridis_c(
      name = "Medals per capita (EB)",
      na.value = "grey90",
      trans = "log10",
      limits = eb_range,
      labels = scales::label_scientific()
    ) +
    coord_sf(ylim = c(-55, 85), expand = FALSE) +
    labs(title = title) +
    theme_void(base_size = 14) +
    theme(
      plot.title = element_text(hjust = 0.5),
      legend.position = "bottom",
      legend.key.width = unit(2, "cm"),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA),
      plot.margin = margin(2, 2, 2, 2)
    )
}

plot_resid_map = function(d, iso3_col = "iso3", resid_col, title = "") {
  map_data = world %>%
    filter(iso_a3_eh != "ATA") %>%
    left_join(d %>% dplyr::select(iso3 = !!sym(iso3_col), resid = !!sym(resid_col)),
              by = c("iso_a3_eh" = "iso3"))

  ggplot(map_data) +
    geom_sf(aes(fill = resid), color = "grey40", linewidth = 0.1) +
    scale_fill_gradient2(
      name = "Residual\n(log scale)",
      na.value = "grey90",
      low = "#d73027", mid = "white", high = "#1a9850",
      midpoint = 0, limits = c(-5, 5), oob = scales::squish
    ) +
    coord_sf(ylim = c(-55, 85), expand = FALSE) +
    labs(title = title) +
    theme_void(base_size = 14) +
    theme(
      plot.title = element_text(hjust = 0.5),
      legend.position = "bottom",
      legend.key.width = unit(2, "cm"),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA),
      plot.margin = margin(2, 2, 2, 2)
    )
}

plot_raw_vs_eb = function(d, n = 30, title = "") {
  d %>%
    arrange(desc(total_eb)) %>%
    head(n) %>%
    mutate(country = fct_reorder(country, total_eb)) %>%
    select(country, Raw = total_pc, EB = total_eb) %>%
    pivot_longer(-country, names_to = "type", values_to = "rate") %>%
    ggplot(aes(rate, country, fill = type)) +
    geom_bar(stat = "identity", position = "dodge") +
    labs(
      title = title,
      x = "Medals per capita",
      fill = "Estimate"
    )
}

Data

Medal tallies

# stable borders, all-time (1896-2024)
d_all_summer = read_csv("data/stable_borders_summer.csv")
## Rows: 116 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d_all_winter = read_csv("data/stable_borders_winter.csv")
## Rows: 35 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d_all_combined = read_csv("data/stable_borders_combined.csv")
## Rows: 119 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# post-Cold War (1994-2024)
d_post1990_summer = read_csv("data/post1994_summer.csv")
## Rows: 134 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): modern_iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d_post1990_winter = read_csv("data/post1994_winter.csv")
## Rows: 49 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): modern_iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d_post1990_combined = read_csv("data/post1994_combined.csv")
## Rows: 138 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): modern_iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# participation: count and fraction of Games attended per country per season
d_all_participation = read_csv("data/stable_borders_participation.csv") %>% summarize_participation()
## Rows: 4028 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): iso3, country, season
## dbl (1): year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d_post1990_participation = read_csv("data/post1994_participation.csv") %>%
  rename(iso3 = modern_iso3) %>%
  summarize_participation()
## Rows: 2569 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): modern_iso3, country, season
## dbl (1): year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Population

# UN World Population Prospects 2024 via OWID
d_un_pop = read_csv("data/un_pop_raw/population.csv") %>%
  rename(iso3 = Code, country = Entity, year = Year, pop = Population) %>%
  filter(!is.na(iso3), iso3 != "")
## Rows: 58824 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Population
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# average population for the full period (1896-2023)
d_un_pop_all = d_un_pop %>%
  filter(year >= 1896, year <= 2023) %>%
  group_by(iso3) %>%
  summarize(avg_pop = mean(pop, na.rm = TRUE), .groups = "drop")

# average population for the post-Cold War period (1994-2023)
d_un_pop_post1990 = d_un_pop %>%
  filter(year >= 1994, year <= 2023) %>%
  group_by(iso3) %>%
  summarize(avg_pop = mean(pop, na.rm = TRUE), .groups = "drop")

HDI

# UNDP HDI 1990-2023
d_hdi_raw <- read_csv("data/hdi_raw/HDR_composite_indices.csv")
## Rows: 206 Columns: 1112
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr    (4): iso3, country, hdicode, region
## dbl (1108): hdi_rank_2023, hdi_1990, hdi_1991, hdi_1992, hdi_1993, hdi_1994,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d_hdi <- d_hdi_raw %>%
  dplyr::select(iso3, starts_with("hdi_19"), starts_with("hdi_20")) %>%
  pivot_longer(-iso3, names_to = "year", values_to = "hdi", names_prefix = "hdi_") %>%
  mutate(year = as.integer(year)) %>%
  filter(!is.na(hdi))

d_hdi_post1990 <- d_hdi %>%
  filter(year >= 1994, year <= 2023) %>%
  group_by(iso3) %>%
  summarize(avg_hdi = mean(hdi, na.rm = TRUE), .groups = "drop") %>%
  # impute missing countries (period averages estimated from current values)
  bind_rows(tibble(
    iso3 = c("TWN", "PRI", "BMU", "PRK"),
    avg_hdi = c(0.81, 0.81, 0.90, 0.56)
  ))

GDP per capita

# Maddison Project GDP per capita (2011 int-$), 1-2022
d_gdppc_raw <- read_xlsx("data/gdppc_raw/maddison_gdppc.xlsx", sheet = "GDPpc", skip = 2)
names(d_gdppc_raw)[1] <- "year"
d_gdppc <- d_gdppc_raw %>%
  pivot_longer(-year, names_to = "iso3", values_to = "gdppc") %>%
  filter(!is.na(gdppc), year >= 1896)

d_gdppc_all <- d_gdppc %>%
  group_by(iso3) %>%
  summarize(avg_gdppc = mean(gdppc, na.rm = TRUE), .groups = "drop")

d_gdppc_post1990 <- d_gdppc %>%
  filter(year >= 1994) %>%
  group_by(iso3) %>%
  summarize(avg_gdppc = mean(gdppc, na.rm = TRUE), .groups = "drop")

World shapefile

world <- ne_countries(scale = "medium", returnclass = "sf") %>%
  select(iso_a3_eh, geometry)

Analysis

Per capita rates

d_all_summer = add_per_capita(d_all_summer, d_un_pop_all, "iso3") %>% add_eb()
d_all_winter = add_per_capita(d_all_winter, d_un_pop_all, "iso3") %>% add_eb()
d_all_combined = add_per_capita(d_all_combined, d_un_pop_all, "iso3") %>% add_eb()

d_post1990_summer = add_per_capita(d_post1990_summer, d_un_pop_post1990, c("modern_iso3" = "iso3")) %>% add_eb()
d_post1990_winter = add_per_capita(d_post1990_winter, d_un_pop_post1990, c("modern_iso3" = "iso3")) %>% add_eb()
d_post1990_combined = add_per_capita(d_post1990_combined, d_un_pop_post1990, c("modern_iso3" = "iso3")) %>% add_eb()

Raw vs. EB rates

p1 <- plot_raw_vs_eb(d_all_summer, title = "Summer, all-time (stable borders)"); p1

p2 <- plot_raw_vs_eb(d_post1990_summer, title = "Summer, post-1994"); p2

p3 <- plot_raw_vs_eb(d_all_winter, title = "Winter, all-time (stable borders)"); p3

p4 <- plot_raw_vs_eb(d_post1990_winter, title = "Winter, post-1994"); p4

p_combined <- (p1 + p2) / (p3 + p4) + plot_layout(guides = "collect")
p_combined

GG_save("figures/raw_vs_eb_summer_alltime.png", p1)
GG_save("figures/raw_vs_eb_summer_post1994.png", p2)
GG_save("figures/raw_vs_eb_winter_alltime.png", p3)
GG_save("figures/raw_vs_eb_winter_post1994.png", p4)
GG_save("figures/raw_vs_eb_2x2.png", p_combined, width = 16, height = 14)

World maps

eb_range <- range(c(d_all_summer$total_eb, d_post1990_summer$total_eb,
                    d_all_winter$total_eb, d_post1990_winter$total_eb))

m1 <- plot_map(d_all_summer, title = "Summer, all-time (stable borders)"); m1

m2 <- plot_map(d_post1990_summer, iso3_col = "modern_iso3", title = "Summer, post-1994"); m2

m3 <- plot_map(d_all_winter, title = "Winter, all-time (stable borders)"); m3

m4 <- plot_map(d_post1990_winter, iso3_col = "modern_iso3", title = "Winter, post-1994"); m4

m_combined <- (m1 + m2) / (m3 + m4) + plot_layout(guides = "collect") &
  theme(legend.position = "bottom")
m_combined

GG_save("figures/map_summer_alltime.png", m1)
GG_save("figures/map_summer_post1994.png", m2)
GG_save("figures/map_winter_alltime.png", m3)
GG_save("figures/map_winter_post1994.png", m4)
GG_save("figures/map_2x2.png", m_combined, width = 16, height = 10)

Post-1994 Summer models

d_reg_post <- d_post1990_summer %>%
  left_join(
    d_post1990_participation %>% filter(season == "Summer"),
    by = c("modern_iso3" = "iso3")
  ) %>%
  left_join(d_hdi_post1990, by = c("modern_iso3" = "iso3")) %>%
  left_join(d_gdppc_post1990, by = c("modern_iso3" = "iso3")) %>%
  dplyr::select(modern_iso3, country = country.x, total,
                avg_pop, frac_games, avg_hdi, avg_gdppc) %>%
  mutate(log_gdppc = log(avg_gdppc)) %>%
  filter(!is.na(avg_hdi), !is.na(avg_gdppc))

# negative binomial models with offset(log(pop))
m_post_part <- MASS::glm.nb(total ~ frac_games + offset(log(avg_pop)), data = d_reg_post)
m_post_hdi  <- MASS::glm.nb(total ~ avg_hdi + offset(log(avg_pop)), data = d_reg_post)
m_post_gdp  <- MASS::glm.nb(total ~ log_gdppc + offset(log(avg_pop)), data = d_reg_post)
m_post_ph   <- MASS::glm.nb(total ~ frac_games + avg_hdi + offset(log(avg_pop)), data = d_reg_post)
m_post_pg   <- MASS::glm.nb(total ~ frac_games + log_gdppc + offset(log(avg_pop)), data = d_reg_post)

models_post <- list(
  "Participation" = m_post_part,
  "HDI" = m_post_hdi,
  "log(GDPpc)" = m_post_gdp,
  "Part + HDI" = m_post_ph,
  "Part + log(GDPpc)" = m_post_pg
)

map2_dfr(models_post, names(models_post), ~ {
  s <- summary(.x)$coefficients
  tibble(term = rownames(s), est = s[,1], se = s[,2], z = s[,3], p = s[,4], model = .y)
}) %>% filter(term != "(Intercept)") %>%
  mutate(sig = case_when(p < 0.001 ~ "***", p < 0.01 ~ "**", p < 0.05 ~ "*", p < 0.1 ~ ".", TRUE ~ "")) %>%
  dplyr::select(model, term, est, se, z, p, sig)
tibble(
  model = names(models_post),
  AIC = sapply(models_post, AIC),
  theta = sapply(models_post, function(m) m$theta)
)
# standardized coefficients for relative importance
d_reg_post_z <- d_reg_post %>%
  mutate(across(c(frac_games, avg_hdi, log_gdppc), ~ scale(.)[,1], .names = "{.col}_z"))

m_post_ph_z <- MASS::glm.nb(total ~ frac_games_z + avg_hdi_z + offset(log(avg_pop)), data = d_reg_post_z)
m_post_pg_z <- MASS::glm.nb(total ~ frac_games_z + log_gdppc_z + offset(log(avg_pop)), data = d_reg_post_z)

coef(summary(m_post_ph_z))[2:3, ]
##              Estimate Std. Error z value Pr(>|z|)
## frac_games_z   0.0483      0.116   0.416 6.78e-01
## avg_hdi_z      1.2662      0.118  10.727 7.57e-27
coef(summary(m_post_pg_z))[2:3, ]
##              Estimate Std. Error z value Pr(>|z|)
## frac_games_z   0.0882      0.126   0.699 4.84e-01
## log_gdppc_z    0.9962      0.125   7.980 1.46e-15

All-time Summer models

d_reg_all <- d_all_summer %>%
  left_join(
    d_all_participation %>% filter(season == "Summer"),
    by = "iso3"
  ) %>%
  left_join(d_gdppc_all, by = "iso3") %>%
  dplyr::select(iso3, country = country.x, total, avg_pop, frac_games, avg_gdppc) %>%
  mutate(log_gdppc = log(avg_gdppc)) %>%
  filter(!is.na(avg_gdppc))

m_all_part <- MASS::glm.nb(total ~ frac_games + offset(log(avg_pop)), data = d_reg_all)
m_all_gdp  <- MASS::glm.nb(total ~ log_gdppc + offset(log(avg_pop)), data = d_reg_all)
m_all_pg   <- MASS::glm.nb(total ~ frac_games + log_gdppc + offset(log(avg_pop)), data = d_reg_all)

models_all <- list(
  "Participation" = m_all_part,
  "log(GDPpc)" = m_all_gdp,
  "Part + log(GDPpc)" = m_all_pg
)

map2_dfr(models_all, names(models_all), ~ {
  s <- summary(.x)$coefficients
  tibble(term = rownames(s), est = s[,1], se = s[,2], z = s[,3], p = s[,4], model = .y)
}) %>% filter(term != "(Intercept)") %>%
  mutate(sig = case_when(p < 0.001 ~ "***", p < 0.01 ~ "**", p < 0.05 ~ "*", p < 0.1 ~ ".", TRUE ~ "")) %>%
  dplyr::select(model, term, est, se, z, p, sig)
tibble(
  model = names(models_all),
  AIC = sapply(models_all, AIC),
  theta = sapply(models_all, function(m) m$theta)
)
# standardized coefficients for relative importance
d_reg_all_z <- d_reg_all %>%
  mutate(across(c(frac_games, log_gdppc), ~ scale(.)[,1], .names = "{.col}_z"))

m_all_pg_z <- MASS::glm.nb(total ~ frac_games_z + log_gdppc_z + offset(log(avg_pop)), data = d_reg_all_z)

coef(summary(m_all_pg_z))[2:3, ]
##              Estimate Std. Error z value Pr(>|z|)
## frac_games_z    0.776      0.139    5.59 2.22e-08
## log_gdppc_z     0.692      0.134    5.18 2.22e-07

Model comparison (standardized betas)

# refit all models with standardized predictors
m_post_part_z <- MASS::glm.nb(total ~ frac_games_z + offset(log(avg_pop)), data = d_reg_post_z)
m_post_hdi_z  <- MASS::glm.nb(total ~ avg_hdi_z + offset(log(avg_pop)), data = d_reg_post_z)
m_post_gdp_z  <- MASS::glm.nb(total ~ log_gdppc_z + offset(log(avg_pop)), data = d_reg_post_z)

m_all_part_z <- MASS::glm.nb(total ~ frac_games_z + offset(log(avg_pop)), data = d_reg_all_z)
m_all_gdp_z  <- MASS::glm.nb(total ~ log_gdppc_z + offset(log(avg_pop)), data = d_reg_all_z)

all_models <- list(
  "Post-94: Part" = m_post_part_z,
  "Post-94: HDI" = m_post_hdi_z,
  "Post-94: GDPpc" = m_post_gdp_z,
  "Post-94: Part+HDI" = m_post_ph_z,
  "Post-94: Part+GDPpc" = m_post_pg_z,
  "All: Part" = m_all_part_z,
  "All: GDPpc" = m_all_gdp_z,
  "All: Part+GDPpc" = m_all_pg_z
)

# McFadden pseudo R²
m_post_null <- MASS::glm.nb(total ~ 1 + offset(log(avg_pop)), data = d_reg_post_z)
m_all_null  <- MASS::glm.nb(total ~ 1 + offset(log(avg_pop)), data = d_reg_all_z)
null_models <- c(rep(list(m_post_null), 5), rep(list(m_all_null), 3))

pseudo_r2 <- function(model, null_model) 1 - as.numeric(logLik(model)) / as.numeric(logLik(null_model))
r2_values <- map2_chr(all_models, null_models, ~ sprintf("%.3f", pseudo_r2(.x, .y)))
aic_values <- map_chr(all_models, ~ sprintf("%.1f", AIC(.x)))

extra_rows <- data.frame(matrix(c("AIC", aic_values, "McFadden R²", r2_values), nrow = 2, byrow = TRUE))
names(extra_rows) <- c(" ", names(all_models))

modelsummary(
  all_models,
  coef_map = c("frac_games_z" = "Participation", "avg_hdi_z" = "HDI", "log_gdppc_z" = "log(GDPpc)"),
  statistic = "({std.error})",
  stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
  gof_map = c("nobs"),
  add_rows = extra_rows
)
Post-94: Part Post-94: HDI Post-94: GDPpc Post-94: Part+HDI Post-94: Part+GDPpc All: Part All: GDPpc All: Part+GDPpc
* p < 0.05, ** p < 0.01, *** p < 0.001
Participation 0.214 0.048 0.088 1.035*** 0.776***
(0.132) (0.116) (0.126) (0.133) (0.139)
HDI 1.281*** 1.266***
(0.110) (0.118)
log(GDPpc) 1.019*** 0.996*** 1.219*** 0.692***
(0.118) (0.125) (0.130) (0.134)
Num.Obs. 125 125 125 125 125 105 105 105
AIC 1309.0 1237.7 1269.9 1239.5 1271.3 1136.4 1139.0 1119.0
McFadden R² 0.003 0.057 0.032 0.057 0.033 0.045 0.043 0.061

Model residuals

# post-1994: residuals from HDI model (best)
d_reg_post <- d_reg_post %>%
  mutate(
    fitted_hdi = fitted(m_post_hdi),
    resid_hdi = log(total) - log(fitted_hdi)
  )

d_reg_post %>% arrange(desc(resid_hdi)) %>%
  dplyr::select(modern_iso3, country, total, fitted_hdi, resid_hdi) %>% head(10)
d_reg_post %>% arrange(resid_hdi) %>%
  dplyr::select(modern_iso3, country, total, fitted_hdi, resid_hdi) %>% head(10)
# all-time: residuals from Part + log(GDPpc) model (best)
d_reg_all <- d_reg_all %>%
  mutate(
    fitted_pg = fitted(m_all_pg),
    resid_pg = log(total) - log(fitted_pg)
  )

d_reg_all %>% arrange(desc(resid_pg)) %>%
  dplyr::select(iso3, country, total, fitted_pg, resid_pg) %>% head(10)
d_reg_all %>% arrange(resid_pg) %>%
  dplyr::select(iso3, country, total, fitted_pg, resid_pg) %>% head(10)
r1 <- plot_resid_map(d_reg_post, "modern_iso3", "resid_hdi",
                     title = "Post-1994 Summer residuals (HDI model)"); r1

r2 <- plot_resid_map(d_reg_all, "iso3", "resid_pg",
                     title = "All-time Summer residuals (Part + GDPpc model)"); r2

r_combined <- r1 / r2 + plot_layout(guides = "collect") &
  theme(legend.position = "bottom")
r_combined

GG_save("figures/resid_map_post1994_hdi.png", r1)
GG_save("figures/resid_map_alltime_pg.png", r2)
GG_save("figures/resid_map_combined.png", r_combined, width = 12, height = 12)

Meta

#versions
write_sessioninfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] modelsummary_2.5.0    readxl_1.4.5          sf_1.0-21            
##  [4] rnaturalearth_1.1.0   patchwork_1.3.2.9000  ebpm_0.0.1.3         
##  [7] kirkegaard_2025-11-19 robustbase_0.99-6     psych_2.5.6          
## [10] assertthat_0.2.1      weights_1.1.2         magrittr_2.0.4       
## [13] lubridate_1.9.4       forcats_1.0.1         stringr_1.6.0        
## [16] dplyr_1.1.4           purrr_1.2.0           readr_2.1.5          
## [19] tidyr_1.3.1           tibble_3.3.0          ggplot2_4.0.1.9000   
## [22] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.17.1       jsonlite_2.0.0         
##   [4] shape_1.4.6.1           datawizard_1.3.0        TH.data_1.1-4          
##   [7] estimability_1.5.1      jomo_2.7-6              farver_2.1.2           
##  [10] nloptr_2.2.1            rmarkdown_2.30          ragg_1.5.0             
##  [13] vctrs_0.6.5             minqa_1.2.8             effectsize_1.0.1       
##  [16] base64enc_0.1-3         mixsqp_0.3-54           htmltools_0.5.8.1      
##  [19] broom_1.0.11            cellranger_1.1.0        Formula_1.2-5          
##  [22] mitml_0.4-5             sass_0.4.10             parallelly_1.45.1      
##  [25] KernSmooth_2.23-26      bslib_0.9.0             htmlwidgets_1.6.4      
##  [28] sandwich_3.1-1          zoo_1.8-14              emmeans_1.11.2-8       
##  [31] cachem_1.1.0            lifecycle_1.0.4         iterators_1.0.14       
##  [34] pkgconfig_2.0.3         Matrix_1.7-4            R6_2.6.1               
##  [37] fastmap_1.2.0           rbibutils_2.3           future_1.67.0          
##  [40] digest_0.6.39           colorspace_2.1-2        irlba_2.3.5.1          
##  [43] textshaping_1.0.4       Hmisc_5.2-4             labeling_0.4.3         
##  [46] tinytable_0.14.0        timechange_0.3.0        gdata_3.0.1            
##  [49] compiler_4.5.2          proxy_0.4-27            bit64_4.6.0-1          
##  [52] withr_3.0.2             htmlTable_2.4.3         S7_0.2.1               
##  [55] backports_1.5.0         DBI_1.2.3               performance_0.15.2     
##  [58] pan_1.9                 MASS_7.3-65             classInt_0.4-11        
##  [61] gtools_3.9.5            tools_4.5.2             units_1.0-0            
##  [64] lmtest_0.9-40           foreign_0.8-91          future.apply_1.20.0    
##  [67] nnet_7.3-20             glue_1.8.0              rnaturalearthdata_1.0.0
##  [70] nlme_3.1-168            grid_4.5.2              checkmate_2.3.3        
##  [73] cluster_2.1.8.2         generics_0.1.4          gtable_0.3.6           
##  [76] tzdb_0.5.0              class_7.3-23            data.table_1.17.8      
##  [79] hms_1.1.3               tables_0.9.31           foreach_1.5.2          
##  [82] pillar_1.11.1           vroom_1.6.6             splines_4.5.2          
##  [85] lattice_0.22-7          survival_3.8-6          bit_4.6.0              
##  [88] tidyselect_1.2.1        knitr_1.50              reformulas_0.4.1       
##  [91] gridExtra_2.3           xfun_0.53               matrixStats_1.5.0      
##  [94] DEoptimR_1.1-4          stringi_1.8.7           yaml_2.3.10            
##  [97] boot_1.3-32             evaluate_1.0.5          codetools_0.2-20       
## [100] archive_1.1.12          cli_3.6.5               rpart_4.1.24           
## [103] xtable_1.8-4            parameters_0.28.2       systemfonts_1.3.1      
## [106] Rdpack_2.6.4            jquerylib_0.1.4         Rcpp_1.1.0             
## [109] globals_0.18.0          coda_0.19-4.1           parallel_4.5.2         
## [112] bayestestR_0.17.0       lme4_1.1-37             listenv_0.9.1          
## [115] glmnet_4.1-10           mvtnorm_1.3-3           viridisLite_0.4.2      
## [118] scales_1.4.0            e1071_1.7-16            insight_1.4.2          
## [121] crayon_1.5.3            rlang_1.1.6.9000        multcomp_1.4-28        
## [124] mnormt_2.1.1            mice_3.18.0
#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)

  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))

  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")

  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
    conflicts = "overwrite"
    )
}