Based on this paper, we can estimate the mean IQ of immigrants, approximately, by simulating data in order to arrive at the proportions seen in the study.

Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
    
)

theme_set(theme_bw())

options(
    digits = 3
)

Functions

run_simulations = function(danish_n, nondanish_n, step_size = 0.1, lower_limit = 80, upper_limit = 90, replicates = 5) {
  #generate range of nondanish means
  nondanish_means = seq(lower_limit, upper_limit, by = step_size) %>% rep(replicates)
  
  #loop across
  results = map_dfr(nondanish_means, ~{
    nondanish_mean = .x
    
    #simulate data
    dd = bind_rows(
      tibble(
        group = "danish",
        IQ = rnorm(danish_n, 100, 15)
      ),
      tibble(
        group = "nondanish",
        IQ = rnorm(nondanish_n, nondanish_mean, 15)
      )
    ) %>% 
      #bands
      mutate(
        IQ_band = cut(IQ, breaks = c(-Inf, 85, 115, Inf), labels = c("low", "middle", "high"))
      )
    
    #proportions of nondanes in bands
    results = dd %>% 
      group_by(IQ_band) %>% 
      summarise(
        prop_nondanish = mean(group == "nondanish")
      )
    
    #return
    tibble(
      nondanish_low = results$prop_nondanish[results$IQ_band == "low"],
      nondanish_middle = results$prop_nondanish[results$IQ_band == "middle"],
      nondanish_high = results$prop_nondanish[results$IQ_band == "high"],
      danish_n = danish_n,
      nondanish_n = nondanish_n,
      nondanish_mean = nondanish_mean
    )
  })
}

Analysis

#sample sizes
danish_sizes = c(
  30892, 173649, 39891
)

nondanish_sizes = c(
  4860, 8059, 440
)

#true nondanish proportions
true_nondanish_props = nondanish_sizes / (danish_sizes + nondanish_sizes)

#run simulations
set.seed(1)
results = run_simulations(
  danish_n = danish_sizes %>% sum(),
  nondanish_n = nondanish_sizes %>% sum()
)

#calculate deviations from expectation
results %<>% 
  mutate(
    deviation = abs(nondanish_low - true_nondanish_props[1]) + 
      abs(nondanish_middle - true_nondanish_props[2]) + 
      abs(nondanish_high - true_nondanish_props[3])
  )

#deviation as function of immigrant mean
results %>% 
  ggplot(aes(nondanish_mean, deviation)) +
  geom_point() +
  #make smooth line fit but increase smoothness
  geom_smooth(span = 0.2) +
  scale_x_continuous(breaks = seq(0, 100, 1)) +
  labs(
    x = "Assumed IQ mean of non-Danes, relative to Danish 100",
    y = "Sum of absolute deviations from true proportions"
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Meta

#versions
sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kirkegaard_2024-06-07 psych_2.4.3           assertthat_0.2.1     
##  [4] weights_1.0.4         Hmisc_5.1-3           magrittr_2.0.3       
##  [7] lubridate_1.9.3       forcats_1.0.0         stringr_1.5.1        
## [10] dplyr_1.1.4           purrr_1.0.2           readr_2.1.5          
## [13] tidyr_1.3.1           tibble_3.2.1          ggplot2_3.5.1        
## [16] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1  farver_2.1.2      fastmap_1.2.0     digest_0.6.35    
##  [5] rpart_4.1.23      timechange_0.3.0  lifecycle_1.0.4   cluster_2.1.6    
##  [9] survival_3.5-8    gdata_3.0.0       compiler_4.4.0    rlang_1.1.3      
## [13] sass_0.4.9        tools_4.4.0       utf8_1.2.4        yaml_2.3.8       
## [17] data.table_1.15.4 knitr_1.47        labeling_0.4.3    htmlwidgets_1.6.4
## [21] mnormt_2.1.1      withr_3.0.0       foreign_0.8-86    nnet_7.3-19      
## [25] grid_4.4.0        fansi_1.0.6       jomo_2.7-6        colorspace_2.1-0 
## [29] mice_3.16.0       scales_1.3.0      gtools_3.9.5      iterators_1.0.14 
## [33] MASS_7.3-60       cli_3.6.2         rmarkdown_2.27    generics_0.1.3   
## [37] rstudioapi_0.16.0 tzdb_0.4.0        minqa_1.2.7       cachem_1.1.0     
## [41] splines_4.4.0     parallel_4.4.0    base64enc_0.1-3   vctrs_0.6.5      
## [45] boot_1.3-30       glmnet_4.1-8      Matrix_1.6-5      jsonlite_1.8.8   
## [49] hms_1.1.3         mitml_0.4-5       Formula_1.2-5     htmlTable_2.4.2  
## [53] foreach_1.5.2     jquerylib_0.1.4   glue_1.7.0        nloptr_2.0.3     
## [57] pan_1.9           codetools_0.2-19  stringi_1.8.4     gtable_0.3.5     
## [61] shape_1.4.6.1     lme4_1.1-35.3     munsell_0.5.1     pillar_1.9.0     
## [65] htmltools_0.5.8.1 R6_2.5.1          evaluate_0.23     lattice_0.22-5   
## [69] highr_0.11        backports_1.5.0   broom_1.0.6       bslib_0.7.0      
## [73] Rcpp_1.0.12       gridExtra_2.3     nlme_3.1-163      checkmate_2.3.1  
## [77] mgcv_1.9-1        xfun_0.44         pkgconfig_2.0.3