Init
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
mirt,
googlesheets4,
patchwork
)
## Loading required package: stats4
## Loading required package: lattice
Functions
extract_item_data = function(x) {
if (!is.list(x)) {
x = list(x)
}
if (!has_names(x)) {
names(x) = seq_along("model " + seq_along(x))
}
#loop and get stats
stats = map2_dfr(x, names(x), function(xx, name) {
xx@Fit$`F` %>% as.data.frame() %>% rownames_to_column() %>% mutate(model = name)
})
#restructure columns
stats2 = pivot_longer(stats, cols = starts_with("F"), names_to = "factor", values_to = "loading")
stats2 %>% rename(item = rowname)
}
Data
wgm2018 = read_excel("data/wgm2018-dataset-crosstabs-all-countries.xlsx", sheet = 2)
#country codes
country_codes = "1=United States, 2=Egypt, 3=Morocco, 4=Lebanon, 5=Saudi Arabia, 6=Jordan, 8=Turkey, 9=Pakistan, 10=Indonesia, 11=Bangladesh, 12=United Kingdom, 13=France, 14=Germany, 15=Netherlands, 16=Belgium, 17=Spain, 18=Italy, 19=Poland, 20=Hungary, 21=Czech Republic, 22=Romania, 23=Sweden, 24=Greece, 25=Denmark, 26=Iran, 28=Singapore, 29=Japan, 30=China, 31=India, 32=Venezuela, 33=Brazil, 34=Mexico, 35=Nigeria, 36=Kenya, 37=Tanzania, 38=Israel, 39=Palestinian Territories, 40=Ghana, 41=Uganda, 42=Benin, 43=Madagascar, 44=Malawi, 45=South Africa, 46=Canada, 47=Australia, 48=Philippines, 49=Sri Lanka, 50=Vietnam, 51=Thailand, 52=Cambodia, 53=Laos, 54=Myanmar, 55=New Zealand, 57=Botswana, 60=Ethiopia, 61=Mali, 62=Mauritania, 63=Mozambique, 64=Niger, 65=Rwanda, 66=Senegal, 67=Zambia, 68=South Korea, 69=Taiwan, 70=Afghanistan, 71=Belarus, 72=Georgia, 73=Kazakhstan, 74=Kyrgyzstan, 75=Moldova, 76=Russia, 77=Ukraine, 78=Burkina Faso, 79=Cameroon, 80=Sierra Leone, 81=Zimbabwe, 82=Costa Rica, 83=Albania, 84=Algeria, 87=Argentina, 88=Armenia, 89=Austria, 90=Azerbaijan, 96=Bolivia, 97=Bosnia and Herzegovina, 99=Bulgaria, 100=Burundi, 103=Chad, 104=Chile, 105=Colombia, 106=Comoros, 108=Republic of Congo, 109=Croatia, 111=Cyprus, 114=Dominican Republic, 115=Ecuador, 116=El Salvador, 119=Estonia, 121=Finland, 122=Gabon, 124=Guatemala, 125=Guinea, 128=Haiti, 129=Honduras, 130=Iceland, 131=Iraq, 132=Ireland, 134=Ivory Coast, 137=Kuwait, 138=Latvia, 140=Liberia, 141=Libya, 143=Lithuania, 144=Luxembourg, 145=Macedonia, 146=Malaysia, 148=Malta, 150=Mauritius, 153=Mongolia, 154=Montenegro, 155=Namibia, 157=Nepal, 158=Nicaragua, 160=Norway, 163=Panama, 164=Paraguay, 165=Peru, 166=Portugal, 173=Serbia, 175=Slovakia, 176=Slovenia, 183=Eswatini, 184=Switzerland, 185=Tajikistan, 186=The Gambia, 187=Togo, 190=Tunisia, 191=Turkmenistan, 193=United Arab Emirates, 194=Uruguay, 195=Uzbekistan, 197=Yemen, 198=Kosovo, 202=Northern Cyprus" %>%
str_split(pattern = ", ") %>%
.[[1]] %>%
str_match("(.+?)=(.+)")
wgm2018$country = mapvalues(wgm2018$WP5, from = country_codes[, 2], to = country_codes[, 3])
#sample sizes
wgm2018$country %>% table2()
#NIQ collection
gs4_deauth()
NIQ = read_sheet("https://docs.google.com/spreadsheets/d/1cReoeIZLlbxOrN4_wnj52Q6UWFepXSJGi3Gj5m6ZAZg/edit#gid=0") %>%
df_legalize_names()
## ✔ Reading from "National IQ datasets".
## ✔ Range 'Sheet1'.
Analysis
sci_items = wgm2018 %>%
select(Q2, Q3, Q4, Q23) %>%
map_df(~mapvalues(., from = c(98, 99), to = c(NA, NA)))
#reverse one item
sci_items$Q4 = 1 - sci_items$Q4
#rename
sci_items %<>% set_colnames(c("understand_science", "diseases", "poetry", "vaccine"))
#cors
sci_items %>%
mixedCor()
## Call: mixedCor(data = .)
## undr_ disss potry vaccn
## understand_science 1.00
## diseases 0.33 1.00
## poetry 0.14 -0.23 1.00
## vaccine 0.29 0.28 0.06 1.00
#fit IRT models
sci4 = mirt(
sci_items,
model = 1,
itemtype = c("graded", "2PL", "2PL", "2PL"),
technical = list(NCYCLES = 10e3),
verbose = F
)
## Warning: data contains response patterns with only NAs
sci4 %>% summary()
## F1 h2
## understand_science 0.816 0.6663
## diseases 0.469 0.2204
## poetry 0.137 0.0187
## vaccine 0.419 0.1755
##
## SS loadings: 1.081
## Proportion Var: 0.27
##
## Factor correlations:
##
## F1
## F1 1
#no poetry
sci3 = mirt(
sci_items[, -3],
model = 1,
itemtype = c("graded", "2PL", "2PL"),
technical = list(NCYCLES = 10e3),
verbose = F
)
## Warning: data contains response patterns with only NAs
sci3 %>% summary()
## F1 h2
## understand_science 0.609 0.371
## diseases 0.609 0.371
## vaccine 0.526 0.277
##
## SS loadings: 1.019
## Proportion Var: 0.34
##
## Factor correlations:
##
## F1
## F1 1
#plot loadings
extract_item_data(
list(sci4, sci3)
)
#scores
sci4_scores = fscores(sci4, full.scores = T, full.scores.SE = T)
sci3_scores = fscores(sci3, full.scores = T, full.scores.SE = T)
#reliability
sci4_scores %>% empirical_rxx()
## F1
## 0.5691699
sci3_scores %>% empirical_rxx()
## F1
## 0.3818689
#add back to main
wgm2018$sci4 = sci4_scores[, 1] * -1
wgm2018$sci3 = sci3_scores[, 1] * -1
#alternative 3 item model
sci3b = mirt(
sci_items[, -1],
model = 1,
itemtype = c("2PL", "2PL", "2PL"),
technical = list(NCYCLES = 10e3),
verbose = F
)
## Warning: data contains response patterns with only NAs
sci3b %>% summary()
## F1 h2
## diseases 0.970 0.9406
## poetry -0.207 0.0427
## vaccine 0.291 0.0847
##
## SS loadings: 1.068
## Proportion Var: 0.356
##
## Factor correlations:
##
## F1
## F1 1
sci3b_scores = fscores(sci3b, full.scores = T, full.scores.SE = T)
#reliability
sci3b_scores %>% empirical_rxx()
## F1
## 0.2875882
#add back to main
wgm2018$sci3b = sci3b_scores[, 1] * -1
#score correlation
GG_scatter(wgm2018, "sci4", "sci3")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/sci4 vs. sci3.png")
## `geom_smooth()` using formula = 'y ~ x'
#IQs based on British norms
wgm2018$sci4_IQ = wgm2018$sci4 %>% standardize(focal_group = (wgm2018$country == "United Kingdom")) %>%
multiply_by(15) %>%
add(100)
#by country
countries = wgm2018 %>%
group_by(country) %>%
summarise(
sci4 = mean(sci4, na.rm = T),
sci3 = mean(sci3, na.rm = T),
sci3b = mean(sci3b, na.rm = T),
sci4_IQ = mean(sci4_IQ, na.rm = T),
understand_science = mean(Q2==1, na.rm = T),
diseases = mean(Q3==1, na.rm = T),
poetry = mean(1 - Q4, na.rm = T),
vaccine = mean(Q23==1, na.rm = T),
)
#fix congo name
countries$country = case_when(
(countries$country == "Republic of Congo") ~ "Congo-Brazzaville",
.default = countries$country
)
countries$ISO = pu_translate(countries$country)
## No exact match: Palestinian Territories
## No exact match: Congo-Brazzaville
## Best fuzzy match found: Palestinian Territories -> Palestinian territories with distance 1.00
## Best fuzzy match found: Congo-Brazzaville -> Congo (Brazzaville) with distance 3.00
d = full_join(
countries,
NIQ,
by = "ISO"
)
d %>%
GG_scatter("sci4", "sci3", case_names = "country")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/countries sci4 vs. sci3.png")
## `geom_smooth()` using formula = 'y ~ x'
d %>%
GG_scatter("IQ_6datasets", "sci4", case_names = "country") +
ylab("Science knowledge (4 items, WGM 2018)") +
xlab("Average IQ based on 6 compilations")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/countries sci4 vs. NIQ6.png")
## `geom_smooth()` using formula = 'y ~ x'
d %>%
GG_scatter("IQ_6datasets", "sci3", case_names = "country")
## `geom_smooth()` using formula = 'y ~ x'

d %>% select(
sci3, sci4, Lynn2012, Rindermann2018, Becker2019, Lim2018, Woess2022, Altinok2023, IQ_6datasets
) %>% wtd.cors()
## sci3 sci4 Lynn2012 Rindermann2018 Becker2019
## sci3 1.0000000 0.9911333 0.6308641 0.6717284 0.6156519
## sci4 0.9911333 1.0000000 0.6505984 0.6937925 0.6384040
## Lynn2012 0.6308641 0.6505984 1.0000000 0.9784505 0.8738381
## Rindermann2018 0.6717284 0.6937925 0.9784505 1.0000000 0.8787456
## Becker2019 0.6156519 0.6384040 0.8738381 0.8787456 1.0000000
## Lim2018 0.6528476 0.6612966 0.8251889 0.8448315 0.8143591
## Woess2022 0.7021948 0.7237265 0.9111668 0.9211480 0.8816955
## Altinok2023 0.6741595 0.6944457 0.8907307 0.9168105 0.8569848
## IQ_6datasets 0.6855297 0.7061816 0.9609571 0.9709506 0.9353064
## Lim2018 Woess2022 Altinok2023 IQ_6datasets
## sci3 0.6528476 0.7021948 0.6741595 0.6855297
## sci4 0.6612966 0.7237265 0.6944457 0.7061816
## Lynn2012 0.8251889 0.9111668 0.8907307 0.9609571
## Rindermann2018 0.8448315 0.9211480 0.9168105 0.9709506
## Becker2019 0.8143591 0.8816955 0.8569848 0.9353064
## Lim2018 1.0000000 0.8683098 0.9317750 0.9198851
## Woess2022 0.8683098 1.0000000 0.9241958 0.9593419
## Altinok2023 0.9317750 0.9241958 1.0000000 0.9632570
## IQ_6datasets 0.9198851 0.9593419 0.9632570 1.0000000
d %>%
GG_scatter("IQ_6datasets", "sci4_IQ", case_names = "country")
## `geom_smooth()` using formula = 'y ~ x'

#by item
p1 = d %>%
GG_scatter("IQ_6datasets", "understand_science", case_names = "country") +
xlab("Average IQ based on 6 compilations")
p2 = d %>%
GG_scatter("IQ_6datasets", "diseases", case_names = "country") +
xlab("Average IQ based on 6 compilations")
p3 = d %>%
GG_scatter("IQ_6datasets", "poetry", case_names = "country") +
xlab("Average IQ based on 6 compilations")
p4 = d %>%
GG_scatter("IQ_6datasets", "vaccine", case_names = "country") +
xlab("Average IQ based on 6 compilations")
(p1 + p2) /
(p3 + p4)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/NIQ vs. items.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
d %>%
GG_scatter("IQ_6datasets", "sci3b", case_names = "country") +
ylab("Science knowledge (3 items [diseases, poetry, vaccine], WGM 2018)") +
xlab("Average IQ based on 6 compilations")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/countries sci3b vs. NIQ6.png")
## `geom_smooth()` using formula = 'y ~ x'