Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4          ✔ readr     2.1.5     
## ✔ forcats   1.0.1          ✔ stringr   1.6.0     
## ✔ ggplot2   4.0.1.9000     ✔ tibble    3.3.0     
## ✔ lubridate 1.9.4          ✔ tidyr     1.3.1     
## ✔ purrr     1.2.0          
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  ebpm,
  patchwork,
  rnaturalearth,
  sf,
  readxl,
  modelsummary
)

## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
## 
## Attaching package: 'modelsummary'
## 
## The following object is masked from 'package:psych':
## 
##     SD

theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

summarize_participation = function(d) {
  totals = d %>% group_by(season) %>% summarize(n_games_total = n_distinct(year), .groups = "drop")
  d %>%
    group_by(iso3, country, season) %>%
    summarize(n_games = n(), .groups = "drop") %>%
    left_join(totals, by = "season") %>%
    mutate(frac_games = n_games / n_games_total)
}

add_per_capita = function(d, pop, by) {
  d %>%
    inner_join(pop, by = by) %>%
    mutate(
      gold_pc = gold / avg_pop,
      silver_pc = silver / avg_pop,
      bronze_pc = bronze / avg_pop,
      total_pc = total / avg_pop
    )
}

# empirical Bayes shrinkage via ebpm (Poisson means with Gamma prior)
add_eb = function(d) {
  d %>%
    mutate(
      gold_eb = ebpm_gamma(gold, avg_pop)$posterior$mean,
      silver_eb = ebpm_gamma(silver, avg_pop)$posterior$mean,
      bronze_eb = ebpm_gamma(bronze, avg_pop)$posterior$mean,
      total_eb = ebpm_gamma(total, avg_pop)$posterior$mean
    )
}

plot_map = function(d, iso3_col = "iso3", title = "") {
  map_data = world %>%
    filter(iso_a3_eh != "ATA") %>%
    left_join(d %>% select(iso3 = !!sym(iso3_col), total_eb), by = c("iso_a3_eh" = "iso3"))

  ggplot(map_data) +
    geom_sf(aes(fill = total_eb), color = "grey40", linewidth = 0.1) +
    scale_fill_viridis_c(
      name = "Medals per capita (EB)",
      na.value = "grey90",
      trans = "log10",
      limits = eb_range,
      labels = scales::label_scientific()
    ) +
    coord_sf(ylim = c(-55, 85), expand = FALSE) +
    labs(title = title) +
    theme_void(base_size = 14) +
    theme(
      plot.title = element_text(hjust = 0.5),
      legend.position = "bottom",
      legend.key.width = unit(2, "cm"),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA),
      plot.margin = margin(2, 2, 2, 2)
    )
}

plot_resid_map = function(d, iso3_col = "iso3", resid_col, title = "") {
  map_data = world %>%
    filter(iso_a3_eh != "ATA") %>%
    left_join(d %>% dplyr::select(iso3 = !!sym(iso3_col), resid = !!sym(resid_col)),
              by = c("iso_a3_eh" = "iso3"))

  ggplot(map_data) +
    geom_sf(aes(fill = resid), color = "grey40", linewidth = 0.1) +
    scale_fill_gradient2(
      name = "Residual\n(log scale)",
      na.value = "grey90",
      low = "#d73027", mid = "white", high = "#1a9850",
      midpoint = 0, limits = c(-5, 5), oob = scales::squish
    ) +
    coord_sf(ylim = c(-55, 85), expand = FALSE) +
    labs(title = title) +
    theme_void(base_size = 14) +
    theme(
      plot.title = element_text(hjust = 0.5),
      legend.position = "bottom",
      legend.key.width = unit(2, "cm"),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA),
      plot.margin = margin(2, 2, 2, 2)
    )
}

plot_raw_vs_eb = function(d, n = 30, title = "") {
  d %>%
    arrange(desc(total_eb)) %>%
    head(n) %>%
    mutate(country = fct_reorder(country, total_eb)) %>%
    select(country, Raw = total_pc, EB = total_eb) %>%
    pivot_longer(-country, names_to = "type", values_to = "rate") %>%
    ggplot(aes(rate, country, fill = type)) +
    geom_bar(stat = "identity", position = "dodge") +
    labs(
      title = title,
      x = "Medals per capita",
      fill = "Estimate"
    )
}

Data

Medal tallies

# stable borders, all-time (1896-2024)
d_all_summer = read_csv("data/stable_borders_summer.csv")

## Rows: 116 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

d_all_winter = read_csv("data/stable_borders_winter.csv")

## Rows: 35 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

d_all_combined = read_csv("data/stable_borders_combined.csv")

## Rows: 119 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# post-Cold War (1994-2024)
d_post1990_summer = read_csv("data/post1994_summer.csv")

## Rows: 134 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): modern_iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

d_post1990_winter = read_csv("data/post1994_winter.csv")

## Rows: 49 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): modern_iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

d_post1990_combined = read_csv("data/post1994_combined.csv")

## Rows: 138 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): modern_iso3, country
## dbl (4): gold, silver, bronze, total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# participation: count and fraction of Games attended per country per season
d_all_participation = read_csv("data/stable_borders_participation.csv") %>% summarize_participation()

## Rows: 4028 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): iso3, country, season
## dbl (1): year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

d_post1990_participation = read_csv("data/post1994_participation.csv") %>%
  rename(iso3 = modern_iso3) %>%
  summarize_participation()

## Rows: 2569 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): modern_iso3, country, season
## dbl (1): year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Population

# UN World Population Prospects 2024 via OWID
d_un_pop = read_csv("data/un_pop_raw/population.csv") %>%
  rename(iso3 = Code, country = Entity, year = Year, pop = Population) %>%
  filter(!is.na(iso3), iso3 != "")

## Rows: 58824 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Population
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# average population for the full period (1896-2023)
d_un_pop_all = d_un_pop %>%
  filter(year >= 1896, year <= 2023) %>%
  group_by(iso3) %>%
  summarize(avg_pop = mean(pop, na.rm = TRUE), .groups = "drop")

# average population for the post-Cold War period (1994-2023)
d_un_pop_post1990 = d_un_pop %>%
  filter(year >= 1994, year <= 2023) %>%
  group_by(iso3) %>%
  summarize(avg_pop = mean(pop, na.rm = TRUE), .groups = "drop")

HDI

# UNDP HDI 1990-2023
d_hdi_raw <- read_csv("data/hdi_raw/HDR_composite_indices.csv")

## Rows: 206 Columns: 1112
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr    (4): iso3, country, hdicode, region
## dbl (1108): hdi_rank_2023, hdi_1990, hdi_1991, hdi_1992, hdi_1993, hdi_1994,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

d_hdi <- d_hdi_raw %>%
  dplyr::select(iso3, starts_with("hdi_19"), starts_with("hdi_20")) %>%
  pivot_longer(-iso3, names_to = "year", values_to = "hdi", names_prefix = "hdi_") %>%
  mutate(year = as.integer(year)) %>%
  filter(!is.na(hdi))

d_hdi_post1990 <- d_hdi %>%
  filter(year >= 1994, year <= 2023) %>%
  group_by(iso3) %>%
  summarize(avg_hdi = mean(hdi, na.rm = TRUE), .groups = "drop") %>%
  # impute missing countries (period averages estimated from current values)
  bind_rows(tibble(
    iso3 = c("TWN", "PRI", "BMU", "PRK"),
    avg_hdi = c(0.81, 0.81, 0.90, 0.56)
  ))

GDP per capita

# Maddison Project GDP per capita (2011 int-$), 1-2022
d_gdppc_raw <- read_xlsx("data/gdppc_raw/maddison_gdppc.xlsx", sheet = "GDPpc", skip = 2)
names(d_gdppc_raw)[1] <- "year"
d_gdppc <- d_gdppc_raw %>%
  pivot_longer(-year, names_to = "iso3", values_to = "gdppc") %>%
  filter(!is.na(gdppc), year >= 1896)

d_gdppc_all <- d_gdppc %>%
  group_by(iso3) %>%
  summarize(avg_gdppc = mean(gdppc, na.rm = TRUE), .groups = "drop")

d_gdppc_post1990 <- d_gdppc %>%
  filter(year >= 1994) %>%
  group_by(iso3) %>%
  summarize(avg_gdppc = mean(gdppc, na.rm = TRUE), .groups = "drop")

World shapefile

world <- ne_countries(scale = "medium", returnclass = "sf") %>%
  select(iso_a3_eh, geometry)

Analysis

Per capita rates

d_all_summer = add_per_capita(d_all_summer, d_un_pop_all, "iso3") %>% add_eb()
d_all_winter = add_per_capita(d_all_winter, d_un_pop_all, "iso3") %>% add_eb()
d_all_combined = add_per_capita(d_all_combined, d_un_pop_all, "iso3") %>% add_eb()

d_post1990_summer = add_per_capita(d_post1990_summer, d_un_pop_post1990, c("modern_iso3" = "iso3")) %>% add_eb()
d_post1990_winter = add_per_capita(d_post1990_winter, d_un_pop_post1990, c("modern_iso3" = "iso3")) %>% add_eb()
d_post1990_combined = add_per_capita(d_post1990_combined, d_un_pop_post1990, c("modern_iso3" = "iso3")) %>% add_eb()

Raw vs. EB rates

p1 <- plot_raw_vs_eb(d_all_summer, title = "Summer, all-time (stable borders)"); p1

p2 <- plot_raw_vs_eb(d_post1990_summer, title = "Summer, post-1994"); p2

p3 <- plot_raw_vs_eb(d_all_winter, title = "Winter, all-time (stable borders)"); p3

p4 <- plot_raw_vs_eb(d_post1990_winter, title = "Winter, post-1994"); p4

p_combined <- (p1 + p2) / (p3 + p4) + plot_layout(guides = "collect")
p_combined

GG_save("figures/raw_vs_eb_summer_alltime.png", p1)
GG_save("figures/raw_vs_eb_summer_post1994.png", p2)
GG_save("figures/raw_vs_eb_winter_alltime.png", p3)
GG_save("figures/raw_vs_eb_winter_post1994.png", p4)
GG_save("figures/raw_vs_eb_2x2.png", p_combined, width = 16, height = 14)

World maps

eb_range <- range(c(d_all_summer$total_eb, d_post1990_summer$total_eb,
                    d_all_winter$total_eb, d_post1990_winter$total_eb))

m1 <- plot_map(d_all_summer, title = "Summer, all-time (stable borders)"); m1

m2 <- plot_map(d_post1990_summer, iso3_col = "modern_iso3", title = "Summer, post-1994"); m2

m3 <- plot_map(d_all_winter, title = "Winter, all-time (stable borders)"); m3

m4 <- plot_map(d_post1990_winter, iso3_col = "modern_iso3", title = "Winter, post-1994"); m4

m_combined <- (m1 + m2) / (m3 + m4) + plot_layout(guides = "collect") &
  theme(legend.position = "bottom")
m_combined

GG_save("figures/map_summer_alltime.png", m1)
GG_save("figures/map_summer_post1994.png", m2)
GG_save("figures/map_winter_alltime.png", m3)
GG_save("figures/map_winter_post1994.png", m4)
GG_save("figures/map_2x2.png", m_combined, width = 16, height = 10)

Post-1994 Summer models

d_reg_post <- d_post1990_summer %>%
  left_join(
    d_post1990_participation %>% filter(season == "Summer"),
    by = c("modern_iso3" = "iso3")
  ) %>%
  left_join(d_hdi_post1990, by = c("modern_iso3" = "iso3")) %>%
  left_join(d_gdppc_post1990, by = c("modern_iso3" = "iso3")) %>%
  dplyr::select(modern_iso3, country = country.x, total,
                avg_pop, frac_games, avg_hdi, avg_gdppc) %>%
  mutate(log_gdppc = log(avg_gdppc)) %>%
  filter(!is.na(avg_hdi), !is.na(avg_gdppc))

# negative binomial models with offset(log(pop))
m_post_part <- MASS::glm.nb(total ~ frac_games + offset(log(avg_pop)), data = d_reg_post)
m_post_hdi  <- MASS::glm.nb(total ~ avg_hdi + offset(log(avg_pop)), data = d_reg_post)
m_post_gdp  <- MASS::glm.nb(total ~ log_gdppc + offset(log(avg_pop)), data = d_reg_post)
m_post_ph   <- MASS::glm.nb(total ~ frac_games + avg_hdi + offset(log(avg_pop)), data = d_reg_post)
m_post_pg   <- MASS::glm.nb(total ~ frac_games + log_gdppc + offset(log(avg_pop)), data = d_reg_post)

models_post <- list(
  "Participation" = m_post_part,
  "HDI" = m_post_hdi,
  "log(GDPpc)" = m_post_gdp,
  "Part + HDI" = m_post_ph,
  "Part + log(GDPpc)" = m_post_pg
)

map2_dfr(models_post, names(models_post), ~ {
  s <- summary(.x)$coefficients
  tibble(term = rownames(s), est = s[,1], se = s[,2], z = s[,3], p = s[,4], model = .y)
}) %>% filter(term != "(Intercept)") %>%
  mutate(sig = case_when(p < 0.001 ~ "***", p < 0.01 ~ "**", p < 0.05 ~ "*", p < 0.1 ~ ".", TRUE ~ "")) %>%
  dplyr::select(model, term, est, se, z, p, sig)

tibble(
  model = names(models_post),
  AIC = sapply(models_post, AIC),
  theta = sapply(models_post, function(m) m$theta)
)

# standardized coefficients for relative importance
d_reg_post_z <- d_reg_post %>%
  mutate(across(c(frac_games, avg_hdi, log_gdppc), ~ scale(.)[,1], .names = "{.col}_z"))

m_post_ph_z <- MASS::glm.nb(total ~ frac_games_z + avg_hdi_z + offset(log(avg_pop)), data = d_reg_post_z)
m_post_pg_z <- MASS::glm.nb(total ~ frac_games_z + log_gdppc_z + offset(log(avg_pop)), data = d_reg_post_z)

coef(summary(m_post_ph_z))[2:3, ]

##              Estimate Std. Error z value Pr(>|z|)
## frac_games_z   0.0483      0.116   0.416 6.78e-01
## avg_hdi_z      1.2662      0.118  10.727 7.57e-27

coef(summary(m_post_pg_z))[2:3, ]

##              Estimate Std. Error z value Pr(>|z|)
## frac_games_z   0.0882      0.126   0.699 4.84e-01
## log_gdppc_z    0.9962      0.125   7.980 1.46e-15

All-time Summer models

d_reg_all <- d_all_summer %>%
  left_join(
    d_all_participation %>% filter(season == "Summer"),
    by = "iso3"
  ) %>%
  left_join(d_gdppc_all, by = "iso3") %>%
  dplyr::select(iso3, country = country.x, total, avg_pop, frac_games, avg_gdppc) %>%
  mutate(log_gdppc = log(avg_gdppc)) %>%
  filter(!is.na(avg_gdppc))

m_all_part <- MASS::glm.nb(total ~ frac_games + offset(log(avg_pop)), data = d_reg_all)
m_all_gdp  <- MASS::glm.nb(total ~ log_gdppc + offset(log(avg_pop)), data = d_reg_all)
m_all_pg   <- MASS::glm.nb(total ~ frac_games + log_gdppc + offset(log(avg_pop)), data = d_reg_all)

models_all <- list(
  "Participation" = m_all_part,
  "log(GDPpc)" = m_all_gdp,
  "Part + log(GDPpc)" = m_all_pg
)

map2_dfr(models_all, names(models_all), ~ {
  s <- summary(.x)$coefficients
  tibble(term = rownames(s), est = s[,1], se = s[,2], z = s[,3], p = s[,4], model = .y)
}) %>% filter(term != "(Intercept)") %>%
  mutate(sig = case_when(p < 0.001 ~ "***", p < 0.01 ~ "**", p < 0.05 ~ "*", p < 0.1 ~ ".", TRUE ~ "")) %>%
  dplyr::select(model, term, est, se, z, p, sig)

tibble(
  model = names(models_all),
  AIC = sapply(models_all, AIC),
  theta = sapply(models_all, function(m) m$theta)
)

# standardized coefficients for relative importance
d_reg_all_z <- d_reg_all %>%
  mutate(across(c(frac_games, log_gdppc), ~ scale(.)[,1], .names = "{.col}_z"))

m_all_pg_z <- MASS::glm.nb(total ~ frac_games_z + log_gdppc_z + offset(log(avg_pop)), data = d_reg_all_z)

coef(summary(m_all_pg_z))[2:3, ]

##              Estimate Std. Error z value Pr(>|z|)
## frac_games_z    0.776      0.139    5.59 2.22e-08
## log_gdppc_z     0.692      0.134    5.18 2.22e-07

Model comparison (standardized betas)

# refit all models with standardized predictors
m_post_part_z <- MASS::glm.nb(total ~ frac_games_z + offset(log(avg_pop)), data = d_reg_post_z)
m_post_hdi_z  <- MASS::glm.nb(total ~ avg_hdi_z + offset(log(avg_pop)), data = d_reg_post_z)
m_post_gdp_z  <- MASS::glm.nb(total ~ log_gdppc_z + offset(log(avg_pop)), data = d_reg_post_z)

m_all_part_z <- MASS::glm.nb(total ~ frac_games_z + offset(log(avg_pop)), data = d_reg_all_z)
m_all_gdp_z  <- MASS::glm.nb(total ~ log_gdppc_z + offset(log(avg_pop)), data = d_reg_all_z)

all_models <- list(
  "Post-94: Part" = m_post_part_z,
  "Post-94: HDI" = m_post_hdi_z,
  "Post-94: GDPpc" = m_post_gdp_z,
  "Post-94: Part+HDI" = m_post_ph_z,
  "Post-94: Part+GDPpc" = m_post_pg_z,
  "All: Part" = m_all_part_z,
  "All: GDPpc" = m_all_gdp_z,
  "All: Part+GDPpc" = m_all_pg_z
)

# McFadden pseudo R²
m_post_null <- MASS::glm.nb(total ~ 1 + offset(log(avg_pop)), data = d_reg_post_z)
m_all_null  <- MASS::glm.nb(total ~ 1 + offset(log(avg_pop)), data = d_reg_all_z)
null_models <- c(rep(list(m_post_null), 5), rep(list(m_all_null), 3))

pseudo_r2 <- function(model, null_model) 1 - as.numeric(logLik(model)) / as.numeric(logLik(null_model))
r2_values <- map2_chr(all_models, null_models, ~ sprintf("%.3f", pseudo_r2(.x, .y)))
aic_values <- map_chr(all_models, ~ sprintf("%.1f", AIC(.x)))

extra_rows <- data.frame(matrix(c("AIC", aic_values, "McFadden R²", r2_values), nrow = 2, byrow = TRUE))
names(extra_rows) <- c(" ", names(all_models))

modelsummary(
  all_models,
  coef_map = c("frac_games_z" = "Participation", "avg_hdi_z" = "HDI", "log_gdppc_z" = "log(GDPpc)"),
  statistic = "({std.error})",
  stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
  gof_map = c("nobs"),
  add_rows = extra_rows
)

	Post-94: Part	Post-94: HDI	Post-94: GDPpc	Post-94: Part+HDI	Post-94: Part+GDPpc	All: Part	All: GDPpc	All: Part+GDPpc
* p < 0.05, p < 0.01, * p < 0.001
Participation	0.214			0.048	0.088	1.035***		0.776***
	(0.132)			(0.116)	(0.126)	(0.133)		(0.139)
HDI		1.281***		1.266***
		(0.110)		(0.118)
log(GDPpc)			1.019***		0.996***		1.219***	0.692***
			(0.118)		(0.125)		(0.130)	(0.134)
Num.Obs.	125	125	125	125	125	105	105	105
AIC	1309.0	1237.7	1269.9	1239.5	1271.3	1136.4	1139.0	1119.0
McFadden R²	0.003	0.057	0.032	0.057	0.033	0.045	0.043	0.061

Model residuals

# post-1994: residuals from HDI model (best)
d_reg_post <- d_reg_post %>%
  mutate(
    fitted_hdi = fitted(m_post_hdi),
    resid_hdi = log(total) - log(fitted_hdi)
  )

d_reg_post %>% arrange(desc(resid_hdi)) %>%
  dplyr::select(modern_iso3, country, total, fitted_hdi, resid_hdi) %>% head(10)

d_reg_post %>% arrange(resid_hdi) %>%
  dplyr::select(modern_iso3, country, total, fitted_hdi, resid_hdi) %>% head(10)

# all-time: residuals from Part + log(GDPpc) model (best)
d_reg_all <- d_reg_all %>%
  mutate(
    fitted_pg = fitted(m_all_pg),
    resid_pg = log(total) - log(fitted_pg)
  )

d_reg_all %>% arrange(desc(resid_pg)) %>%
  dplyr::select(iso3, country, total, fitted_pg, resid_pg) %>% head(10)

d_reg_all %>% arrange(resid_pg) %>%
  dplyr::select(iso3, country, total, fitted_pg, resid_pg) %>% head(10)

r1 <- plot_resid_map(d_reg_post, "modern_iso3", "resid_hdi",
                     title = "Post-1994 Summer residuals (HDI model)"); r1

r2 <- plot_resid_map(d_reg_all, "iso3", "resid_pg",
                     title = "All-time Summer residuals (Part + GDPpc model)"); r2

r_combined <- r1 / r2 + plot_layout(guides = "collect") &
  theme(legend.position = "bottom")
r_combined

GG_save("figures/resid_map_post1994_hdi.png", r1)
GG_save("figures/resid_map_alltime_pg.png", r2)
GG_save("figures/resid_map_combined.png", r_combined, width = 12, height = 12)

Olympics: who is good?

26 February, 2026

Init

Functions

Data

Medal tallies

Population

HDI

GDP per capita

World shapefile

Analysis

Per capita rates

Raw vs. EB rates

World maps

Post-1994 Summer models

All-time Summer models

Model comparison (standardized betas)

Model residuals

Meta