Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  sf,
  googlesheets4,
  rnaturalearth
)

## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE

theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

insert_row_values <- function(df, values, at = 2, col_start = 1) {
  stopifnot(is.data.frame(df))

  n_vals  <- length(values)
  col_end <- col_start + n_vals - 1

  if (col_end > ncol(df)) {
    stop("Values exceed number of columns available from col_start.")
  }

  # Create a 1-row blank template
  new_row <- df[1, , drop = FALSE]
  new_row[,] <- NA

  # Fill the desired columns
  new_row[1, col_start:col_end] <- values

  # Insert the row
  dplyr::bind_rows(
    df[seq_len(at - 1), , drop = FALSE],
    new_row,
    df[at:nrow(df), , drop = FALSE]
  )
}

insert_row = function(df, values, at = 1) {
  # browser()
  assertthat::assert_that(ncol(df) == length(values))
  
  if (at == 1)  {
    idx_before = numeric()
    idx_after = 1:nrow(df)
  } else if (at == nrow(df)) {
    idx_before = 1:nrow(df)
    idx_after = numeric()
  } else {
    idx_before = 1:(at-1)
    idx_after = at:nrow(df)
  }
  
  bind_rows(
    df[idx_before, ],
    matrix(values, nrow = 1) %>% {colnames(.) = colnames(df); .} %>% as_tibble(),
    df[idx_after, ]
  )
}

insert_row(
  iris[1:5, -5],
  values = 1:4
)

Data

#NIQ and S
gs4_deauth()
d_niq_s = read_sheet("https://docs.google.com/spreadsheets/d/1cReoeIZLlbxOrN4_wnj52Q6UWFepXSJGi3Gj5m6ZAZg/edit?gid=528842851#gid=528842851", sheet = 3)

## ✔ Reading from "National IQ datasets".

## ✔ Range ''Seb2024b''.

#ISO3
d_niq_s$ISO = pu_translate(d_niq_s$Name)

## No exact match: Hong Kong SAR China

## No exact match: Macao SAR China

## No exact match: Palestinian Territories

## No exact match: Congo - Brazzaville

## No exact match: Congo - Kinshasa

## No exact match: São Tomé & Príncipe

## Best fuzzy match found: Hong Kong SAR China -> Hong Kong SAR, China with distance 1.00

## Best fuzzy match found: Macao SAR China -> Macao SAR, China with distance 1.00

## Best fuzzy match found: Palestinian Territories -> Palestinian territories with distance 1.00

## Best fuzzy match found: Congo - Brazzaville -> Congo (Brazzaville) with distance 3.00

## Best fuzzy match found: Congo - Kinshasa -> Congo (Zaire) with distance 9.00

## Best fuzzy match found: São Tomé & Príncipe -> São Tomé and Príncipe with distance 3.00

#GDPpc
d_gdppc = read_csv("data/gdp-per-capita-worldbank/gdp-per-capita-worldbank.csv") %>% df_legalize_names()

## Rows: 7311 Columns: 5

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Entity, Code, World regions according to OWID
## dbl (2): Year, GDP per capita, PPP (constant 2021 international $)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#rename
d_gdppc %<>% 
  rename(
    ISO = Code,
    GDPpcppp = GDP_per_capita_PPP_constant_2021_international
  )

#productivity
d_prod = read_csv("data/labor-productivity-per-hour-pennworldtable/labor-productivity-per-hour-pennworldtable.csv") %>% df_legalize_names()

## Rows: 4965 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Productivity: output per hour worked
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#fix
d_prod %<>% 
  rename(
    ISO = Code,
    Productivity = Productivity_output_per_hour_worked
  )

#population
d_pop = read_csv("data/population/population.csv") %>% df_legalize_names()

## Rows: 58824 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Population (historical)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#fix
d_pop %<>% rename(
  ISO = Code,
  Population = Population_historical
)

# Use geometries as sf
d_worldmap <- rnaturalearth::ne_countries(returnclass = "sf")

# 1) Reproject to a projected CRS (e.g. Web Mercator 3857)
d_worldmap_proj <- st_transform(d_worldmap, 3857)

# 2) Compute centroids in projected space
d_worldmap_cent <- d_worldmap_proj %>%
  st_centroid()

## Warning: st_centroid assumes attributes are constant over geometries

# 3) Transform centroids back to lon/lat (EPSG:4326)
d_worldmap_cent <- st_transform(d_worldmap_cent, 4326)

# 4) Extract lon/lat as vars
coords <- st_coordinates(d_worldmap_cent)

d_worldmap_cent <- d_worldmap_cent %>%
  mutate(
    lon = coords[, "X"],
    lat = coords[, "Y"]
  )

#ISO
d_worldmap_cent %<>% rename(
  ISO = adm0_a3
)

#join all data
d = smart_join(
  d_niq_s %>% select(-Name),
  d_gdppc %>% filter(Year == 2023, !is.na(ISO)) %>% select(ISO, GDPpcppp),
  id_col = "ISO"
) %>% smart_join(
  d_prod %>% filter(Year == 2023, !is.na(ISO)) %>% select(ISO, Productivity),
  id_col = "ISO"
) %>% smart_join(
  d_pop %>% filter(Year == 2023, !is.na(ISO)) %>% select(ISO, Population),
  id_col = "ISO"
) %>% smart_join(
  d_worldmap_cent %>% st_drop_geometry() %>% select(ISO, lat, lon),
  id_col = "ISO"
)

#drop non-countries
d %<>% filter(!str_detect(ISO, "OWID_"))

#mutate
d %<>% mutate(
  log_population = log10(Population),
  log_GDPpcppp = log10(GDPpcppp)
)

#spatial lag
d %<>% spatial_knn(
  vars = c("Productivity", "GDPpcppp", "log_GDPpcppp", "SDI")
)

#spatial cors
d %>% spatial_lag_cors()

##          SDI     GDPpcppp Productivity log_GDPpcppp 
##        0.867        0.769        0.792        0.804

Analysis

#cors
d %>% 
  select(-ISO, -ends_with("_lag"), -lat, -lon) %>% 
  GG_heatmap()

GG_save("figs/nat_cors.png")

#scatters
p_pop_gdp = d %>% 
  GG_scatter("Population", "GDPpcppp", case_names = "ISO") +
  labs(
    y = "GDP per person PPP",
    x = "Population count"
  )

p_pop_log_gdp = d %>% 
  GG_scatter("Population", "log_GDPpcppp", case_names = "ISO") +
  labs(
    y = "Log10 GDP per person PPP",
    x = "Population count"
  )

p_log_pop_gdp = d %>% 
  GG_scatter("log_population", "GDPpcppp", case_names = "ISO") +
  labs(
    y = "GDP per person PPP",
    x = "Log10 population count"
  )

p_log_pop_log_gdp = d %>% 
  GG_scatter("log_population", "log_GDPpcppp", case_names = "ISO") +
  labs(
    y = "Log10 GDP per person PPP",
    x = "Log10 population count"
  )

patchwork::wrap_plots(
  p_pop_gdp,
  p_pop_log_gdp,
  p_log_pop_gdp,
  p_log_pop_log_gdp
)

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/4 scatter pop gdp.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#models
#gdp
d_std = d %>% df_standardize()

## Skipped ISO because it is a character (string)

mods = list(
  lm(GDPpcppp ~ log_population, data = d_std),
  lm(GDPpcppp ~ GDPpcppp_lag + log_population, data = d_std),
  lm(GDPpcppp ~ GDPpcppp_lag + log_population + NIQ, data = d_std),
  
  lm(log_GDPpcppp ~ log_population, data = d_std),
  lm(log_GDPpcppp ~ log_GDPpcppp_lag + log_population, data = d_std),
  lm(log_GDPpcppp ~ log_GDPpcppp_lag + log_population + NIQ, data = d_std),
  
  lm(Productivity ~ log_population, data = d_std),
  lm(Productivity ~ Productivity_lag + log_population, data = d_std),
  lm(Productivity ~ Productivity_lag + log_population + NIQ, data = d_std),
  
  lm(SDI ~ log_population, data = d_std),
  lm(SDI ~ SDI_lag + log_population, data = d_std),
  lm(SDI ~ SDI_lag + log_population + NIQ, data = d_std)
)

mods %>% 
  summarize_models(asterisks_only = F) %>% 
  insert_row(
    values = c("outcome", rep("GDPpcppp", 3), rep("log10 GDPpcppp", 3), rep("productivity", 3), rep("S", 3)),
    at = 1
  ) %>% 
  flextable::flextable()

Predictor/Model	1	2	3	4	5	6	7	8	9	10	11	12
outcome	GDPpcppp	GDPpcppp	GDPpcppp	log10 GDPpcppp	log10 GDPpcppp	log10 GDPpcppp	productivity	productivity	productivity	S	S	S
(Intercept)	0.05 (0.074, 0.54)	0.03 (0.062, 0.636)	0.05 (0.055, 0.402)	0.05 (0.074, 0.496)	-0.02 (0.063, 0.738)	0.02 (0.052, 0.689)	0.33 (0.132, 0.014)	0.02 (0.094, 0.869)	-0.06 (0.084, 0.489)	0.03 (0.079, 0.701)	-0.05 (0.053, 0.381)	0.00 (0.038, 0.97)
log_population	-0.19 (0.088, 0.034)	-0.17 (0.081, 0.032)	-0.22 (0.072, 0.003**)	-0.21 (0.087, 0.019)	-0.08 (0.082, 0.349)	-0.17 (0.069, 0.012)	-0.52 (0.160, 0.001**)	-0.06 (0.114, 0.621)	-0.07 (0.101, 0.463)	-0.11 (0.098, 0.27)	0.03 (0.069, 0.667)	-0.07 (0.050, 0.167)
SDI_lag											0.89 (0.040, <0.001***)	0.34 (0.052, <0.001***)
NIQ			0.38 (0.057, <0.001***)			0.54 (0.062, <0.001***)			0.40 (0.068, <0.001***)			0.65 (0.052, <0.001***)
GDPpcppp_lag		0.71 (0.047, <0.001***)	0.46 (0.057, <0.001***)
log_GDPpcppp_lag					0.81 (0.048, <0.001***)	0.39 (0.062, <0.001***)
Productivity_lag								0.79 (0.059, <0.001***)	0.52 (0.070, <0.001***)
R2 adj.	0.018	0.598	0.688	0.023	0.643	0.761	0.070	0.622	0.704	0.001	0.748	0.872
N	196	161	160	196	161	160	129	124	124	193	166	166

#without china and india
d_std_sub1 = d %>% filter(!ISO %in% c("CHN", "IND")) %>% df_standardize()

## Skipped ISO because it is a character (string)

#refit
mods_sub1 = list(
  lm(GDPpcppp ~ log_population, data = d_std_sub1),
  lm(GDPpcppp ~ GDPpcppp_lag + log_population, data = d_std_sub1),
  lm(GDPpcppp ~ GDPpcppp_lag + log_population + NIQ, data = d_std_sub1),
  
  lm(log_GDPpcppp ~ log_population, data = d_std_sub1),
  lm(log_GDPpcppp ~ log_GDPpcppp_lag + log_population, data = d_std_sub1),
  lm(log_GDPpcppp ~ log_GDPpcppp_lag + log_population + NIQ, data = d_std_sub1),
  
  lm(Productivity ~ log_population, data = d_std_sub1),
  lm(Productivity ~ Productivity_lag + log_population, data = d_std_sub1),
  lm(Productivity ~ Productivity_lag + log_population + NIQ, data = d_std_sub1),
  
  lm(SDI ~ log_population, data = d_std_sub1),
  lm(SDI ~ SDI_lag + log_population, data = d_std_sub1),
  lm(SDI ~ SDI_lag + log_population + NIQ, data = d_std_sub1)
)

mods_sub1 %>% 
  summarize_models(asterisks_only = F) %>% 
  insert_row(
    values = c("outcome", rep("GDPpcppp", 3), rep("log10 GDPpcppp", 3), rep("productivity", 3), rep("S", 3)),
    at = 1
  ) %>% 
  flextable::flextable()

Predictor/Model	1	2	3	4	5	6	7	8	9	10	11	12
outcome	GDPpcppp	GDPpcppp	GDPpcppp	log10 GDPpcppp	log10 GDPpcppp	log10 GDPpcppp	productivity	productivity	productivity	S	S	S
(Intercept)	0.04 (0.074, 0.551)	0.04 (0.064, 0.507)	0.05 (0.057, 0.374)	0.05 (0.074, 0.48)	-0.01 (0.065, 0.91)	0.03 (0.054, 0.613)	0.33 (0.138, 0.018)	0.00 (0.099, 0.983)	-0.10 (0.088, 0.282)	0.03 (0.080, 0.681)	-0.03 (0.054, 0.536)	0.00 (0.039, 0.968)
log_population	-0.18 (0.089, 0.043)	-0.20 (0.084, 0.02)	-0.22 (0.074, 0.003**)	-0.21 (0.089, 0.017)	-0.10 (0.085, 0.223)	-0.18 (0.071, 0.011)	-0.52 (0.170, 0.003**)	-0.04 (0.121, 0.769)	-0.02 (0.106, 0.816)	-0.11 (0.100, 0.253)	0.00 (0.071, 0.958)	-0.07 (0.052, 0.189)
SDI_lag											0.89 (0.040, <0.001***)	0.33 (0.053, <0.001***)
NIQ			0.38 (0.058, <0.001***)			0.54 (0.063, <0.001***)			0.42 (0.069, <0.001***)			0.66 (0.053, <0.001***)
GDPpcppp_lag		0.71 (0.048, <0.001***)	0.46 (0.058, <0.001***)
log_GDPpcppp_lag					0.81 (0.048, <0.001***)	0.39 (0.063, <0.001***)
Productivity_lag								0.79 (0.060, <0.001***)	0.51 (0.071, <0.001***)
R2 adj.	0.016	0.600	0.688	0.024	0.646	0.762	0.062	0.619	0.707	0.002	0.752	0.872
N	194	159	158	194	159	158	127	122	122	191	164	164

#NIQ vs. S
d %>% 
  GG_scatter("NIQ", "SDI", case_names = "ISO") +
  labs(
    x = "IQ mean",
    y = "Socioeconomic development",
    title = "National intelligence means predict most international variation in development",
    subtitle = "Data source: Jensen & Kirkegaard 2024"
  )

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/scatter_NIQ_S.png")

## `geom_smooth()` using formula = 'y ~ x'

Meta

#versions
write_sessioninfo()

## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rnaturalearth_1.1.0   googlesheets4_1.1.2   sf_1.0-21            
##  [4] kirkegaard_2025-11-19 robustbase_0.99-6     psych_2.5.6          
##  [7] assertthat_0.2.1      weights_1.1.2         magrittr_2.0.4       
## [10] lubridate_1.9.4       forcats_1.0.1         stringr_1.5.2        
## [13] dplyr_1.1.4           purrr_1.1.0           readr_2.1.5          
## [16] tidyr_1.3.1           tibble_3.3.0          ggplot2_4.0.0        
## [19] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.17.1       jsonlite_2.0.0         
##   [4] shape_1.4.6.1           jomo_2.7-6              farver_2.1.2           
##   [7] nloptr_2.2.1            rmarkdown_2.30          fs_1.6.6               
##  [10] ragg_1.5.0              vctrs_0.6.5             minqa_1.2.8            
##  [13] askpass_1.2.1           base64enc_0.1-3         terra_1.8-70           
##  [16] htmltools_0.5.8.1       curl_7.0.0              broom_1.0.10           
##  [19] cellranger_1.1.0        Formula_1.2-5           mitml_0.4-5            
##  [22] sass_0.4.10             KernSmooth_2.23-26      bslib_0.9.0            
##  [25] htmlwidgets_1.6.4       plyr_1.8.9              cachem_1.1.0           
##  [28] uuid_1.2-1              lifecycle_1.0.4         iterators_1.0.14       
##  [31] pkgconfig_2.0.3         Matrix_1.7-4            R6_2.6.1               
##  [34] fastmap_1.2.0           rbibutils_2.3           digest_0.6.37          
##  [37] colorspace_2.1-2        patchwork_1.3.2         textshaping_1.0.4      
##  [40] Hmisc_5.2-4             labeling_0.4.3          timechange_0.3.0       
##  [43] gdata_3.0.1             httr_1.4.7              mgcv_1.9-3             
##  [46] compiler_4.5.2          gargle_1.6.0            proxy_0.4-27           
##  [49] bit64_4.6.0-1           fontquiver_0.2.1        withr_3.0.2            
##  [52] htmlTable_2.4.3         S7_0.2.0                backports_1.5.0        
##  [55] DBI_1.2.3               pan_1.9                 MASS_7.3-65            
##  [58] openssl_2.3.4           classInt_0.4-11         gtools_3.9.5           
##  [61] tools_4.5.2             units_1.0-0             foreign_0.8-90         
##  [64] googledrive_2.1.2       zip_2.3.3               nnet_7.3-20            
##  [67] glue_1.8.0              nlme_3.1-168            grid_4.5.2             
##  [70] stringdist_0.9.15       checkmate_2.3.3         cluster_2.1.8.1        
##  [73] generics_0.1.4          gtable_0.3.6            tzdb_0.5.0             
##  [76] class_7.3-23            data.table_1.17.8       hms_1.1.3              
##  [79] xml2_1.4.0              foreach_1.5.2           pillar_1.11.1          
##  [82] vroom_1.6.6             splines_4.5.2           lattice_0.22-7         
##  [85] survival_3.8-3          bit_4.6.0               tidyselect_1.2.1       
##  [88] fontLiberation_0.1.0    knitr_1.50              fontBitstreamVera_0.1.1
##  [91] reformulas_0.4.1        gridExtra_2.3           xfun_0.53              
##  [94] DEoptimR_1.1-4          stringi_1.8.7           yaml_2.3.10            
##  [97] boot_1.3-32             evaluate_1.0.5          codetools_0.2-20       
## [100] officer_0.7.0           gdtools_0.4.4           archive_1.1.12         
## [103] cli_3.6.5               rpart_4.1.24            systemfonts_1.3.1      
## [106] Rdpack_2.6.4            jquerylib_0.1.4         Rcpp_1.1.0             
## [109] readxl_1.4.5            parallel_4.5.2          lme4_1.1-37            
## [112] glmnet_4.1-10           scales_1.4.0            e1071_1.7-16           
## [115] crayon_1.5.3            flextable_0.9.10        rlang_1.1.6            
## [118] mnormt_2.1.1            mice_3.18.0

#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}

Does population size matter?

04 December, 2025

Init

Functions

Data

Analysis

Meta