Init
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
## Loading required package: lattice
##
## Loading required package: survival
##
## Loading required package: Formula
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following objects are masked from 'package:purrr':
##
## is_logical, is_numeric
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
googlesheets4,
tmap,
giscoR,
readxl
)
theme_set(theme_bw())
options(
digits = 3
)
Data
#map data
# fine line in WGS84
world <- gisco_get_countries(resolution = "10",
epsg = 4326)
#woessmap
# gs4_auth("the.dfx@gmail.com")
# woess = read_sheet("https://docs.google.com/spreadsheets/d/15SymdhLnLX1UP10UbhjrT3ZN7ZPoaySiqwXAigUW3Mw/edit#gid=0")
woess = read_excel("data/National basic skills (Woessman 2022).xlsx") %>%
mutate(
ISO = pu_translate(Country)
) %>%
df_legalize_names()
#becker data
becker = read_excel("data/NIQ-DATASET-V1.3.3/NIQ-DATA (V1.3.3).xlsx", sheet = 2, range = "A2:N205") %>%
df_legalize_names() %>%
mutate(
ISO = pu_translate(Country)
)
## No exact match: Central African Rep.
## No exact match: Korea, North
## No exact match: Saint Helena, Ascension, and Tristan da Cunha
## No exact match: Virgin Islands
## Best fuzzy match found: Central African Rep. -> Central African Republic with distance 5.00
## Best fuzzy match found: Korea, North -> Korea North with distance 1.00
## Best fuzzy match found: Saint Helena, Ascension, and Tristan da Cunha -> Saint Helena, Ascension and Tristan da Cunha with distance 1.00
## Warning: There were multiple equally good matches for Virgin Islands: Cayman
## Islands | Faroe Islands | Mariana Islands | Pitcairn Islands | Jarvis Island |
## Midway Islands | U.S. Virgin Islands. All with distance 5.00
#smart fraction data, to get US classification
smartfraction = read_rds("data/smart_fraction_data_out.rds")
Merge
d = full_join(woess, becker, by = "ISO") %>%
left_join(smartfraction %>% select(ISO, UN_macroregion), by = "ISO")
assert_that(!anyDuplicated(d$ISO))
## [1] TRUE
#rename
d %<>% rename(
Lynn2012 = L_and_V12plusGEO
)
Analysis
#inverse normal transform from their enrollment corrected estimates
d %<>% mutate(
basic_skills_z = qnorm(Estimated_below_basic_of_all_students, lower.tail = F)
)
#rescale age heaping to IQ scale
#use Lynn as basis, find overlap set
Lynn_bs_overlap = d %>% select(Lynn2012, basic_skills_z, ISO) %>% na.omit()
(Lynn_bs_overlap_sumstats = Lynn_bs_overlap %>% describe2())
#rescale
d$basic_skill_mean_IQ = d$basic_skills_z - Lynn_bs_overlap_sumstats$mean[2]
d$basic_skill_mean_IQ = d$basic_skill_mean_IQ / Lynn_bs_overlap_sumstats$sd[2]
d$basic_skill_mean_IQ = Lynn_bs_overlap_sumstats$mean[1] + d$basic_skill_mean_IQ * Lynn_bs_overlap_sumstats$sd[1]
#regional means
describe2(d %>% select(Lynn2012, basic_skill_mean_IQ), d$UN_macroregion) %>%
select(group, var, mean) %>%
pivot_wider(names_from = group, values_from = mean)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
Maps
world$ISO = world$ISO3_CODE
d_map = left_join(world, d, by = "ISO")
d_map$basic_skill_mean_IQ_rounded = d_map$basic_skill_mean_IQ %>% round()
tm_shape(d_map) +
tm_fill("basic_skill_mean_IQ", title = "Basic skills, in IQ's") +
tm_text("basic_skill_mean_IQ_rounded", size = 0.5) ->
map
map

tmap_save(map, "figs/basic_skills_map.png")
## Map saved to /media/emil/8tb_ssd_3/projects/basic_skills_2022/figs/basic_skills_map.png
## Resolution: 3054 by 1444 pixels
## Size: 10.2 by 4.81 inches (300 dpi)