Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## Loading required package: lattice
## 
## Loading required package: survival
## 
## Loading required package: Formula
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  googlesheets4,
  tmap,
  giscoR,
  readxl
)

theme_set(theme_bw())

options(
    digits = 3
)

Data

#map data
# fine line in WGS84
world <- gisco_get_countries(resolution = "10",
                            epsg = 4326)

#woessmap
# gs4_auth("the.dfx@gmail.com")
# woess = read_sheet("https://docs.google.com/spreadsheets/d/15SymdhLnLX1UP10UbhjrT3ZN7ZPoaySiqwXAigUW3Mw/edit#gid=0")
woess = read_excel("data/National basic skills (Woessman 2022).xlsx") %>% 
  mutate(
    ISO = pu_translate(Country)
  ) %>% 
  df_legalize_names()

#becker data
becker = read_excel("data/NIQ-DATASET-V1.3.3/NIQ-DATA (V1.3.3).xlsx", sheet = 2, range = "A2:N205") %>% 
  df_legalize_names() %>% 
  mutate(
    ISO = pu_translate(Country)
  )
## No exact match: Central African Rep.
## No exact match: Korea, North
## No exact match: Saint Helena, Ascension, and Tristan da Cunha
## No exact match: Virgin Islands
## Best fuzzy match found: Central African Rep. -> Central African Republic with distance 5.00
## Best fuzzy match found: Korea, North -> Korea North with distance 1.00
## Best fuzzy match found: Saint Helena, Ascension, and Tristan da Cunha -> Saint Helena, Ascension and Tristan da Cunha with distance 1.00
## Warning: There were multiple equally good matches for Virgin Islands: Cayman
## Islands | Faroe Islands | Mariana Islands | Pitcairn Islands | Jarvis Island |
## Midway Islands | U.S. Virgin Islands. All with distance 5.00
#smart fraction data, to get US classification
smartfraction = read_rds("data/smart_fraction_data_out.rds")

Merge

d = full_join(woess, becker, by = "ISO") %>% 
  left_join(smartfraction %>% select(ISO, UN_macroregion), by = "ISO")

assert_that(!anyDuplicated(d$ISO))
## [1] TRUE
#rename
d %<>% rename(
  Lynn2012 = L_and_V12plusGEO
)

Analysis

#inverse normal transform from their enrollment corrected estimates
d %<>% mutate(
  basic_skills_z = qnorm(Estimated_below_basic_of_all_students, lower.tail = F)
)

#rescale age heaping to IQ scale
#use Lynn as basis, find overlap set
Lynn_bs_overlap = d %>% select(Lynn2012, basic_skills_z, ISO) %>% na.omit()
(Lynn_bs_overlap_sumstats = Lynn_bs_overlap %>% describe2())
#rescale
d$basic_skill_mean_IQ = d$basic_skills_z - Lynn_bs_overlap_sumstats$mean[2]
d$basic_skill_mean_IQ = d$basic_skill_mean_IQ / Lynn_bs_overlap_sumstats$sd[2]
d$basic_skill_mean_IQ = Lynn_bs_overlap_sumstats$mean[1] + d$basic_skill_mean_IQ * Lynn_bs_overlap_sumstats$sd[1]

#regional means
describe2(d %>% select(Lynn2012, basic_skill_mean_IQ), d$UN_macroregion) %>% 
  select(group, var, mean) %>% 
  pivot_wider(names_from = group, values_from = mean)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

Maps

world$ISO = world$ISO3_CODE
d_map = left_join(world, d, by = "ISO")
d_map$basic_skill_mean_IQ_rounded = d_map$basic_skill_mean_IQ %>% round()

tm_shape(d_map) +
  tm_fill("basic_skill_mean_IQ", title = "Basic skills, in IQ's") +
  tm_text("basic_skill_mean_IQ_rounded", size = 0.5) ->
  map

map

tmap_save(map, "figs/basic_skills_map.png")
## Map saved to /media/emil/8tb_ssd_3/projects/basic_skills_2022/figs/basic_skills_map.png
## Resolution: 3054 by 1444 pixels
## Size: 10.2 by 4.81 inches (300 dpi)

Meta

#list countries
d %>% select(Country.x, ISO, basic_skill_mean_IQ, Lynn2012)
#session info
write_sessioninfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readxl_1.4.1          giscoR_0.3.2          tmap_3.3-3           
##  [4] googlesheets4_1.0.1   kirkegaard_2022-08-31 psych_2.2.9          
##  [7] assertthat_0.2.1      weights_1.0.4         Hmisc_4.7-1          
## [10] Formula_1.2-4         survival_3.2-13       lattice_0.20-45      
## [13] magrittr_2.0.3        forcats_0.5.2         stringr_1.4.1        
## [16] dplyr_1.0.10          purrr_0.3.5           readr_2.1.3          
## [19] tidyr_1.2.1           tibble_3.1.8          ggplot2_3.3.6        
## [22] tidyverse_1.3.2      
## 
## loaded via a namespace (and not attached):
##   [1] leafem_0.2.0        googledrive_2.0.0   minqa_1.2.4        
##   [4] colorspace_2.0-3    deldir_1.0-6        ellipsis_0.3.2     
##   [7] class_7.3-20        leaflet_2.1.1       htmlTable_2.4.1    
##  [10] base64enc_0.1-3     fs_1.5.2            dichromat_2.0-0.1  
##  [13] rstudioapi_0.14     proxy_0.4-27        mice_3.14.0        
##  [16] fansi_1.0.3         lubridate_1.8.0     xml2_1.3.3         
##  [19] codetools_0.2-18    splines_4.1.2       mnormt_2.1.1       
##  [22] cachem_1.0.6        knitr_1.40          jsonlite_1.8.2     
##  [25] nloptr_2.0.3        tmaptools_3.1-1     rematch_1.0.1      
##  [28] broom_1.0.1         cluster_2.1.2       dbplyr_2.2.1       
##  [31] png_0.1-7           compiler_4.1.2      httr_1.4.4         
##  [34] backports_1.4.1     Matrix_1.4-0        fastmap_1.1.0      
##  [37] gargle_1.2.1        cli_3.4.1           s2_1.1.0           
##  [40] htmltools_0.5.3     tools_4.1.2         gtable_0.3.1       
##  [43] glue_1.6.2          wk_0.7.0            Rcpp_1.0.9         
##  [46] raster_3.6-3        cellranger_1.1.0    jquerylib_0.1.4    
##  [49] vctrs_0.4.2         gdata_2.18.0.1      nlme_3.1-155       
##  [52] leafsync_0.1.0      crosstalk_1.2.0     lwgeom_0.2-9       
##  [55] xfun_0.34           lme4_1.1-30         rvest_1.0.3        
##  [58] lifecycle_1.0.3     gtools_3.9.3        stringdist_0.9.8   
##  [61] XML_3.99-0.11       terra_1.6-17        MASS_7.3-55        
##  [64] scales_1.2.1        hms_1.1.2           parallel_4.1.2     
##  [67] RColorBrewer_1.1-3  yaml_2.3.6          gridExtra_2.3      
##  [70] sass_0.4.2          rpart_4.1.16        latticeExtra_0.6-30
##  [73] stringi_1.7.8       highr_0.9           e1071_1.7-11       
##  [76] checkmate_2.1.0     boot_1.3-28         rlang_1.0.6        
##  [79] pkgconfig_2.0.3     evaluate_0.17       sf_1.0-8           
##  [82] htmlwidgets_1.5.4   tidyselect_1.2.0    plyr_1.8.7         
##  [85] geojsonsf_2.0.3     R6_2.5.1            generics_0.1.3     
##  [88] DBI_1.1.3           pillar_1.8.1        haven_2.5.1        
##  [91] foreign_0.8-82      withr_2.5.0         stars_0.5-6        
##  [94] units_0.8-0         sp_1.5-0            abind_1.4-5        
##  [97] nnet_7.3-17         modelr_0.1.9        crayon_1.5.2       
## [100] interp_1.1-3        KernSmooth_2.23-20  utf8_1.2.2         
## [103] tzdb_0.3.0          rmarkdown_2.17      jpeg_0.1-9         
## [106] grid_4.1.2          data.table_1.14.4   reprex_2.0.2       
## [109] digest_0.6.30       classInt_0.4-8      munsell_0.5.0      
## [112] viridisLite_0.4.1   bslib_0.4.0
#write data
d %>% 
  select(ISO, Country.x, UN_macroregion, basic_skill_mean_IQ, Lynn2012)