Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  readxl,
  mirt,
  googlesheets4,
  patchwork
)
## Loading required package: stats4
## Loading required package: lattice

Functions

extract_item_data = function(x) {
  if (!is.list(x)) {
    x = list(x)
  }
  
  if (!has_names(x)) {
    names(x) = seq_along("model " + seq_along(x))
  }
  
  #loop and get stats
  stats = map2_dfr(x, names(x), function(xx, name) {
    xx@Fit$`F` %>% as.data.frame() %>% rownames_to_column() %>% mutate(model = name)
  })
  
  #restructure columns
  stats2 = pivot_longer(stats, cols = starts_with("F"), names_to = "factor", values_to = "loading")
  
  stats2 %>% rename(item = rowname)
}

Data

wgm2018 = read_excel("data/wgm2018-dataset-crosstabs-all-countries.xlsx", sheet = 2)

#country codes
country_codes = "1=United States, 2=Egypt, 3=Morocco, 4=Lebanon, 5=Saudi Arabia, 6=Jordan, 8=Turkey, 9=Pakistan, 10=Indonesia, 11=Bangladesh, 12=United Kingdom, 13=France, 14=Germany, 15=Netherlands, 16=Belgium, 17=Spain, 18=Italy, 19=Poland, 20=Hungary, 21=Czech Republic, 22=Romania, 23=Sweden, 24=Greece, 25=Denmark, 26=Iran, 28=Singapore, 29=Japan, 30=China, 31=India, 32=Venezuela, 33=Brazil, 34=Mexico, 35=Nigeria, 36=Kenya, 37=Tanzania, 38=Israel, 39=Palestinian Territories, 40=Ghana, 41=Uganda, 42=Benin, 43=Madagascar, 44=Malawi, 45=South Africa, 46=Canada, 47=Australia, 48=Philippines, 49=Sri Lanka, 50=Vietnam, 51=Thailand, 52=Cambodia, 53=Laos, 54=Myanmar, 55=New Zealand, 57=Botswana, 60=Ethiopia, 61=Mali, 62=Mauritania, 63=Mozambique, 64=Niger, 65=Rwanda, 66=Senegal, 67=Zambia, 68=South Korea, 69=Taiwan, 70=Afghanistan, 71=Belarus, 72=Georgia, 73=Kazakhstan, 74=Kyrgyzstan, 75=Moldova, 76=Russia, 77=Ukraine, 78=Burkina Faso, 79=Cameroon, 80=Sierra Leone, 81=Zimbabwe, 82=Costa Rica, 83=Albania, 84=Algeria, 87=Argentina, 88=Armenia, 89=Austria, 90=Azerbaijan, 96=Bolivia, 97=Bosnia and Herzegovina, 99=Bulgaria, 100=Burundi, 103=Chad, 104=Chile, 105=Colombia, 106=Comoros, 108=Republic of Congo, 109=Croatia, 111=Cyprus, 114=Dominican Republic, 115=Ecuador, 116=El Salvador, 119=Estonia, 121=Finland, 122=Gabon, 124=Guatemala, 125=Guinea, 128=Haiti, 129=Honduras, 130=Iceland, 131=Iraq, 132=Ireland, 134=Ivory Coast, 137=Kuwait, 138=Latvia, 140=Liberia, 141=Libya, 143=Lithuania, 144=Luxembourg, 145=Macedonia, 146=Malaysia, 148=Malta, 150=Mauritius, 153=Mongolia, 154=Montenegro, 155=Namibia, 157=Nepal, 158=Nicaragua, 160=Norway, 163=Panama, 164=Paraguay, 165=Peru, 166=Portugal, 173=Serbia, 175=Slovakia, 176=Slovenia, 183=Eswatini, 184=Switzerland, 185=Tajikistan, 186=The Gambia, 187=Togo, 190=Tunisia, 191=Turkmenistan, 193=United Arab Emirates, 194=Uruguay, 195=Uzbekistan, 197=Yemen, 198=Kosovo, 202=Northern Cyprus" %>% 
  str_split(pattern = ", ") %>% 
  .[[1]] %>% 
  str_match("(.+?)=(.+)")

wgm2018$country = mapvalues(wgm2018$WP5, from = country_codes[, 2], to = country_codes[, 3])

#sample sizes
wgm2018$country %>% table2()
#NIQ collection
gs4_deauth()
NIQ = read_sheet("https://docs.google.com/spreadsheets/d/1cReoeIZLlbxOrN4_wnj52Q6UWFepXSJGi3Gj5m6ZAZg/edit#gid=0") %>% 
  df_legalize_names()
## ✔ Reading from "National IQ datasets".
## ✔ Range 'Sheet1'.

Analysis

sci_items = wgm2018 %>% 
  select(Q2, Q3, Q4, Q23) %>% 
  map_df(~mapvalues(., from = c(98, 99), to = c(NA, NA)))

#reverse one item
sci_items$Q4 = 1 - sci_items$Q4

#rename
sci_items %<>% set_colnames(c("understand_science", "diseases", "poetry", "vaccine"))

#cors
sci_items %>% 
  mixedCor()
## Call: mixedCor(data = .)
##                    undr_ disss potry vaccn
## understand_science  1.00                  
## diseases            0.33  1.00            
## poetry              0.14 -0.23  1.00      
## vaccine             0.29  0.28  0.06  1.00
#fit IRT models
sci4 = mirt(
  sci_items,
  model = 1,
  itemtype = c("graded", "2PL", "2PL", "2PL"),
  technical = list(NCYCLES = 10e3),
  verbose = F
)
## Warning: data contains response patterns with only NAs
sci4 %>% summary()
##                       F1     h2
## understand_science 0.816 0.6663
## diseases           0.469 0.2204
## poetry             0.137 0.0187
## vaccine            0.419 0.1755
## 
## SS loadings:  1.081 
## Proportion Var:  0.27 
## 
## Factor correlations: 
## 
##    F1
## F1  1
#no poetry
sci3 = mirt(
  sci_items[, -3],
  model = 1,
  itemtype = c("graded", "2PL", "2PL"),
  technical = list(NCYCLES = 10e3),
  verbose = F
)
## Warning: data contains response patterns with only NAs
sci3 %>% summary()
##                       F1    h2
## understand_science 0.609 0.371
## diseases           0.609 0.371
## vaccine            0.526 0.277
## 
## SS loadings:  1.019 
## Proportion Var:  0.34 
## 
## Factor correlations: 
## 
##    F1
## F1  1
#plot loadings
extract_item_data(
  list(sci4, sci3)
)
#scores
sci4_scores = fscores(sci4, full.scores = T, full.scores.SE = T)
sci3_scores = fscores(sci3, full.scores = T, full.scores.SE = T)

#reliability
sci4_scores %>% empirical_rxx()
##        F1 
## 0.5691699
sci3_scores %>% empirical_rxx()
##        F1 
## 0.3818689
#add back to main
wgm2018$sci4 = sci4_scores[, 1] * -1
wgm2018$sci3 = sci3_scores[, 1] * -1

#alternative 3 item model
sci3b = mirt(
  sci_items[, -1],
  model = 1,
  itemtype = c("2PL", "2PL", "2PL"),
  technical = list(NCYCLES = 10e3),
  verbose = F
)
## Warning: data contains response patterns with only NAs
sci3b %>% summary()
##              F1     h2
## diseases  0.970 0.9406
## poetry   -0.207 0.0427
## vaccine   0.291 0.0847
## 
## SS loadings:  1.068 
## Proportion Var:  0.356 
## 
## Factor correlations: 
## 
##    F1
## F1  1
sci3b_scores = fscores(sci3b, full.scores = T, full.scores.SE = T)

#reliability
sci3b_scores %>% empirical_rxx()
##        F1 
## 0.2875882
#add back to main
wgm2018$sci3b = sci3b_scores[, 1] * -1

#score correlation
GG_scatter(wgm2018, "sci4", "sci3")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/sci4 vs. sci3.png")
## `geom_smooth()` using formula = 'y ~ x'
#IQs based on British norms
wgm2018$sci4_IQ = wgm2018$sci4 %>% standardize(focal_group = (wgm2018$country == "United Kingdom")) %>% 
  multiply_by(15) %>% 
  add(100)

#by country
countries = wgm2018 %>% 
  group_by(country) %>% 
  summarise(
    sci4 = mean(sci4, na.rm = T),
    sci3 = mean(sci3, na.rm = T),
    sci3b = mean(sci3b, na.rm = T),
    sci4_IQ = mean(sci4_IQ, na.rm = T),
    understand_science = mean(Q2==1, na.rm = T),
    diseases = mean(Q3==1, na.rm = T),
    poetry = mean(1 - Q4, na.rm = T),
    vaccine = mean(Q23==1, na.rm = T),
  )

#fix congo name
countries$country = case_when(
  (countries$country == "Republic of Congo") ~ "Congo-Brazzaville",
  .default = countries$country
)
countries$ISO = pu_translate(countries$country)
## No exact match: Palestinian Territories
## No exact match: Congo-Brazzaville
## Best fuzzy match found: Palestinian Territories -> Palestinian territories with distance 1.00
## Best fuzzy match found: Congo-Brazzaville -> Congo (Brazzaville) with distance 3.00
d = full_join(
  countries,
  NIQ,
  by = "ISO"
)

d %>% 
  GG_scatter("sci4", "sci3", case_names = "country")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/countries sci4 vs. sci3.png")
## `geom_smooth()` using formula = 'y ~ x'
d %>% 
  GG_scatter("IQ_6datasets", "sci4", case_names = "country") +
  ylab("Science knowledge (4 items, WGM 2018)") +
  xlab("Average IQ based on 6 compilations")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/countries sci4 vs. NIQ6.png")
## `geom_smooth()` using formula = 'y ~ x'
d %>% 
  GG_scatter("IQ_6datasets", "sci3", case_names = "country")
## `geom_smooth()` using formula = 'y ~ x'

d %>% select(
  sci3, sci4, Lynn2012, Rindermann2018, Becker2019, Lim2018, Woess2022, Altinok2023, IQ_6datasets
) %>% wtd.cors()
##                     sci3      sci4  Lynn2012 Rindermann2018 Becker2019
## sci3           1.0000000 0.9911333 0.6308641      0.6717284  0.6156519
## sci4           0.9911333 1.0000000 0.6505984      0.6937925  0.6384040
## Lynn2012       0.6308641 0.6505984 1.0000000      0.9784505  0.8738381
## Rindermann2018 0.6717284 0.6937925 0.9784505      1.0000000  0.8787456
## Becker2019     0.6156519 0.6384040 0.8738381      0.8787456  1.0000000
## Lim2018        0.6528476 0.6612966 0.8251889      0.8448315  0.8143591
## Woess2022      0.7021948 0.7237265 0.9111668      0.9211480  0.8816955
## Altinok2023    0.6741595 0.6944457 0.8907307      0.9168105  0.8569848
## IQ_6datasets   0.6855297 0.7061816 0.9609571      0.9709506  0.9353064
##                  Lim2018 Woess2022 Altinok2023 IQ_6datasets
## sci3           0.6528476 0.7021948   0.6741595    0.6855297
## sci4           0.6612966 0.7237265   0.6944457    0.7061816
## Lynn2012       0.8251889 0.9111668   0.8907307    0.9609571
## Rindermann2018 0.8448315 0.9211480   0.9168105    0.9709506
## Becker2019     0.8143591 0.8816955   0.8569848    0.9353064
## Lim2018        1.0000000 0.8683098   0.9317750    0.9198851
## Woess2022      0.8683098 1.0000000   0.9241958    0.9593419
## Altinok2023    0.9317750 0.9241958   1.0000000    0.9632570
## IQ_6datasets   0.9198851 0.9593419   0.9632570    1.0000000
d %>% 
  GG_scatter("IQ_6datasets", "sci4_IQ", case_names = "country")
## `geom_smooth()` using formula = 'y ~ x'

#by item
p1 = d %>% 
  GG_scatter("IQ_6datasets", "understand_science", case_names = "country") +
  xlab("Average IQ based on 6 compilations")

p2 = d %>% 
  GG_scatter("IQ_6datasets", "diseases", case_names = "country") +
  xlab("Average IQ based on 6 compilations")

p3 = d %>% 
  GG_scatter("IQ_6datasets", "poetry", case_names = "country") +
  xlab("Average IQ based on 6 compilations")

p4 = d %>% 
  GG_scatter("IQ_6datasets", "vaccine", case_names = "country") +
  xlab("Average IQ based on 6 compilations")

(p1 + p2) / 
  (p3 + p4)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/NIQ vs. items.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
d %>% 
  GG_scatter("IQ_6datasets", "sci3b", case_names = "country") +
  ylab("Science knowledge (3 items [diseases, poetry, vaccine], WGM 2018)") +
  xlab("Average IQ based on 6 compilations")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/countries sci3b vs. NIQ6.png")
## `geom_smooth()` using formula = 'y ~ x'

Meta

write_sessioninfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_DK.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_DK.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_DK.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] patchwork_1.1.2       googlesheets4_1.1.1   mirt_1.39            
##  [4] lattice_0.22-5        readxl_1.4.2          kirkegaard_2023-04-30
##  [7] psych_2.3.3           assertthat_0.2.1      weights_1.0.4        
## [10] Hmisc_5.1-0           magrittr_2.0.3        lubridate_1.9.2      
## [13] forcats_1.0.0         stringr_1.5.0         dplyr_1.1.2          
## [16] purrr_1.0.1           readr_2.1.4           tidyr_1.3.0          
## [19] tibble_3.2.1          ggplot2_3.4.2         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] mnormt_2.1.1         pbapply_1.7-0        gridExtra_2.3       
##  [4] permute_0.9-7        rlang_1.1.1          compiler_4.3.2      
##  [7] mgcv_1.9-1           gdata_2.19.0         systemfonts_1.0.4   
## [10] vctrs_0.6.3          pkgconfig_2.0.3      shape_1.4.6         
## [13] fastmap_1.1.1        backports_1.4.1      labeling_0.4.2      
## [16] utf8_1.2.3           rmarkdown_2.22       tzdb_0.4.0          
## [19] nloptr_2.0.3         ragg_1.2.5           xfun_0.39           
## [22] glmnet_4.1-7         jomo_2.7-6           cachem_1.0.8        
## [25] jsonlite_1.8.5       highr_0.10           pan_1.6             
## [28] Deriv_4.1.3          broom_1.0.5          parallel_4.3.2      
## [31] cluster_2.1.6        R6_2.5.1             bslib_0.5.0         
## [34] stringi_1.7.12       boot_1.3-28          rpart_4.1.23        
## [37] jquerylib_0.1.4      cellranger_1.1.0     Rcpp_1.0.10         
## [40] iterators_1.0.14     knitr_1.43           base64enc_0.1-3     
## [43] Matrix_1.6-3         splines_4.3.2        nnet_7.3-19         
## [46] timechange_0.2.0     tidyselect_1.2.0     stringdist_0.9.12   
## [49] rstudioapi_0.14      yaml_2.3.7           vegan_2.6-4         
## [52] codetools_0.2-19     dcurver_0.9.2        curl_5.2.0          
## [55] withr_2.5.0          evaluate_0.21        foreign_0.8-86      
## [58] survival_3.5-7       pillar_1.9.0         mice_3.16.0         
## [61] checkmate_2.2.0      foreach_1.5.2        generics_0.1.3      
## [64] hms_1.1.3            munsell_0.5.0        scales_1.2.1        
## [67] minqa_1.2.5          gtools_3.9.4         glue_1.6.2          
## [70] tools_4.3.2          data.table_1.14.8    lme4_1.1-33         
## [73] fs_1.6.2             grid_4.3.2           colorspace_2.1-0    
## [76] nlme_3.1-163         htmlTable_2.4.1      googledrive_2.1.1   
## [79] Formula_1.2-5        cli_3.6.1            textshaping_0.3.6   
## [82] fansi_1.0.4          gargle_1.5.1         gtable_0.3.3        
## [85] sass_0.4.6           digest_0.6.31        GPArotation_2023.3-1
## [88] farver_2.1.1         htmlwidgets_1.6.2    htmltools_0.5.5     
## [91] lifecycle_1.0.3      httr_1.4.6           mitml_0.4-5         
## [94] MASS_7.3-60