options(
digits = 3
)
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
googlesheets4,
readxl,
patchwork,
lavaan,
tidySEM
)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
##
## Attaching package: 'lavaan'
##
## The following object is masked from 'package:psych':
##
## cor2cov
##
## Loading required package: OpenMx
## To take full advantage of multiple cores, use:
## mxOption(key='Number of Threads', value=parallel::detectCores()) #now
## Sys.setenv(OMP_NUM_THREADS=parallel::detectCores()) #before library(OpenMx)
##
## Attaching package: 'OpenMx'
##
## The following object is masked from 'package:psych':
##
## tr
##
## Registered S3 method overwritten by 'tidySEM':
## method from
## predict.MxModel OpenMx
theme_set(theme_bw())
#read from sheets
gs4_deauth()
d_gaps = read_sheet("https://docs.google.com/spreadsheets/d/1ef6ieSnS7k-jpKjV1QgZ4XfynS7xiJfIE-LVNE4RaoM/edit#gid=0") %>%
mutate(ISO = pu_translate(Country)) %>%
df_legalize_names()
## ✔ Reading from "Norwegian math test gaps".
## ✔ Range 'scores'.
#Becker data
becker = readxl::read_excel("data/NIQ-DATASET-V1.3.3/NIQ-DATA (V1.3.3).xlsx", sheet = 2L, range = "A2:N205") %>%
df_legalize_names()
## New names:
## • `` -> `...1`
## • `` -> `...2`
#fix names somehow broken
names(becker)[1:2] = c("ISO_orig", "Country")
#US virgin islands
becker$Country[200] = "US Virgin Islands"
#redo ISO to avoid errors
becker %<>% mutate(
ISO = pu_translate(Country)
)
## No exact match: Central African Rep.
## No exact match: Korea, North
## No exact match: Saint Helena, Ascension, and Tristan da Cunha
## No exact match: US Virgin Islands
## Best fuzzy match found: Central African Rep. -> Central African Republic with distance 5.00
## Best fuzzy match found: Korea, North -> Korea North with distance 1.00
## Best fuzzy match found: Saint Helena, Ascension, and Tristan da Cunha -> Saint Helena, Ascension and Tristan da Cunha with distance 1.00
## Best fuzzy match found: US Virgin Islands -> U.S. Virgin Islands with distance 2.00
#join
d = inner_join(d_gaps, becker %>% select(-Country, -ISO_orig), by = "ISO")
#means including estimated CIs
bind_rows(
tibble(
group = "two foreign parents",
math = d$Full_foreign_d,
n = d$Full_foreign_n,
country = d$Country
),
tibble(
group = "one foreign parent",
math = d$Half_foreign_d,
n = d$Half_foreign_n,
country = d$Country
)
) %>%
mutate(
country_ordered = fct_reorder(country, math),
CI_upper = math + 1.96 * 1/sqrt(n),
CI_lower = math - 1.96 * 1/sqrt(n)
) %>%
ggplot(aes(math, country_ordered, color = group)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = CI_lower, xmax = CI_upper), alpha = .3, height = 0) +
geom_vline(xintercept = 0, linetype = "dotted") +
scale_y_discrete("Origin") +
scale_x_continuous("Mathematical score relative to Norwegians (0)")
GG_save("figs/math_scores.png")
#simple plots
GG_scatter(d, "R", "Full_foreign_d", weights = d$Full_foreign_n %>% sqrt(), case_names = "Country") +
ylab("Mean math score (z-score)") +
xlab("National intelligence (Rindermann)") ->
p_full
p_full
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/full_gap.png")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d, "R", "Half_foreign_d", weights = d$Half_foreign_n %>% sqrt(), case_names = "Country") +
ylab("Mean math score (z-score)") +
xlab("National intelligence (Rindermann)") ->
p_half
p_half
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/half_gap.png")
## `geom_smooth()` using formula = 'y ~ x'
#confirm numerics
wtd.cor(x = d$Full_foreign_d, y = d$R, weight = d$Full_foreign_n %>% sqrt())
## correlation std.err t.value p.value
## Y 0.584 0.124 4.72 2.49e-05
wtd.cor(x = d$Half_foreign_d, y = d$R, weight = d$Half_foreign_n %>% sqrt())
## correlation std.err t.value p.value
## Y 0.643 0.117 5.51 1.87e-06
#combine plots
p_full + p_half
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/both_gaps.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#average for more precision
d$z_score_gaps = d %>% select(Full_foreign_d, Half_foreign_d) %>% df_standardize() %>% rowMeans()
d$combined_sample_size = d$Half_foreign_n + d$Full_foreign_n
GG_scatter(d, "R", "z_score_gaps", case_names = "Country", weights = d$combined_sample_size %>% sqrt()) +
ylab("Mean math score, full and half combined (z-score)") +
xlab("National intelligence (Rindermann)")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/combined_gaps.png")
## `geom_smooth()` using formula = 'y ~ x'
#numbers
#overall correlation matrix
d %>% select(Full_foreign_d, Half_foreign_d, z_score_gaps, R, L_and_V12plusGEO, QNWplusSASplusGEO) %>% wtd.cors()
## Full_foreign_d Half_foreign_d z_score_gaps R
## Full_foreign_d 1.000 0.714 0.926 0.683
## Half_foreign_d 0.714 1.000 0.926 0.582
## z_score_gaps 0.926 0.926 1.000 0.683
## R 0.683 0.582 0.683 1.000
## L_and_V12plusGEO 0.712 0.564 0.690 0.973
## QNWplusSASplusGEO 0.682 0.576 0.680 0.961
## L_and_V12plusGEO QNWplusSASplusGEO
## Full_foreign_d 0.712 0.682
## Half_foreign_d 0.564 0.576
## z_score_gaps 0.690 0.680
## R 0.973 0.961
## L_and_V12plusGEO 1.000 0.946
## QNWplusSASplusGEO 0.946 1.000
d %>% select(Full_foreign_d, Half_foreign_d, z_score_gaps, R, L_and_V12plusGEO, QNWplusSASplusGEO) %>% cor_matrix(p_val = T)
## Full_foreign_d Half_foreign_d z_score_gaps R
## Full_foreign_d "1" "0.71***" "0.93***" "0.68***"
## Half_foreign_d "0.71***" "1" "0.93***" "0.58***"
## z_score_gaps "0.93***" "0.93***" "1" "0.68***"
## R "0.68***" "0.58***" "0.68***" "1"
## L_and_V12plusGEO "0.71***" "0.56***" "0.69***" "0.97***"
## QNWplusSASplusGEO "0.68***" "0.58***" "0.68***" "0.96***"
## L_and_V12plusGEO QNWplusSASplusGEO
## Full_foreign_d "0.71***" "0.68***"
## Half_foreign_d "0.56***" "0.58***"
## z_score_gaps "0.69***" "0.68***"
## R "0.97***" "0.96***"
## L_and_V12plusGEO "1" "0.95***"
## QNWplusSASplusGEO "0.95***" "1"
GG_save("figs/combined_gap.png")
## `geom_smooth()` using formula = 'y ~ x'
#SEM
model = "
G =~ QNWplusSASplusGEO + L_and_V12 + R
math =~ Full_foreign_d + Half_foreign_d
math ~ G"
d_std = d %>% select(Half_foreign_d, Full_foreign_d, QNWplusSASplusGEO, L_and_V12, R, combined_sample_size) %>% df_standardize(exclude = "combined_sample_size")
## Skipped combined_sample_size because it is in the exclude list
sem_fit = cfa(
model,
data = d_std,
std.ov = T,
std.lv = T
)
sem_fit
## lavaan 0.6.15 ended normally after 31 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Used Total
## Number of observations 41 45
##
## Model Test User Model:
##
## Test statistic 4.595
## Degrees of freedom 4
## P-value (Chi-square) 0.331
sem_fit %>% summary()
## lavaan 0.6.15 ended normally after 31 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Used Total
## Number of observations 41 45
##
## Model Test User Model:
##
## Test statistic 4.595
## Degrees of freedom 4
## P-value (Chi-square) 0.331
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## G =~
## QNWplsSASplGEO 0.958 0.112 8.527 0.000
## L_and_V12 0.968 0.111 8.699 0.000
## R 0.980 0.110 8.914 0.000
## math =~
## Full_foreign_d 0.587 0.116 5.065 0.000
## Half_foreign_d 0.499 0.098 5.081 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## math ~
## G 1.165 0.308 3.783 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .QNWplsSASplGEO 0.058 0.016 3.693 0.000
## .L_and_V12 0.038 0.012 3.073 0.002
## .R 0.014 0.010 1.462 0.144
## .Full_foreign_d 0.164 0.118 1.389 0.165
## .Half_foreign_d 0.389 0.118 3.290 0.001
## G 1.000
## .math 1.000
parameterestimates(sem_fit, standardized = T)
fitmeasures(sem_fit)
## npar fmin chisq
## 11.000 0.056 4.595
## df pvalue baseline.chisq
## 4.000 0.331 290.589
## baseline.df baseline.pvalue cfi
## 10.000 0.000 0.998
## tli nnfi rfi
## 0.995 0.995 0.960
## nfi pnfi ifi
## 0.984 0.394 0.998
## rni logl unrestricted.logl
## 0.998 -145.354 -143.057
## aic bic ntotal
## 312.709 331.558 41.000
## bic2 rmsea rmsea.ci.lower
## 297.124 0.060 0.000
## rmsea.ci.upper rmsea.ci.level rmsea.pvalue
## 0.250 0.900 0.384
## rmsea.close.h0 rmsea.notclose.pvalue rmsea.notclose.h0
## 0.050 0.538 0.080
## rmr rmr_nomean srmr
## 0.009 0.009 0.009
## srmr_bentler srmr_bentler_nomean crmr
## 0.009 0.009 0.011
## crmr_nomean srmr_mplus srmr_mplus_nomean
## 0.011 0.009 0.009
## cn_05 cn_01 gfi
## 85.657 119.465 0.960
## agfi pgfi mfi
## 0.850 0.256 0.993
## ecvi
## 0.649
#slope
lm(Half_foreign_d ~ Full_foreign_d, data = d, weights = sqrt(d$combined_sample_size)) %>% summary()
##
## Call:
## lm(formula = Half_foreign_d ~ Full_foreign_d, data = d, weights = sqrt(d$combined_sample_size))
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -1.679 -0.155 -0.002 0.202 0.980
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0364 0.0130 -2.80 0.0076 **
## Full_foreign_d 0.3281 0.0434 7.56 2e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.455 on 43 degrees of freedom
## Multiple R-squared: 0.57, Adjusted R-squared: 0.56
## F-statistic: 57.1 on 1 and 43 DF, p-value: 2.03e-09
#SEM plot
graph_sem(
sem_fit,
edges = get_edges(sem_fit) %>% filter(arrow != "both") %>% mutate(label = est_std),
# layout = get_layout(sem_fit, layout_algorithm = "layout_nicely"),
layout = matrix(c(NA, "L_and_V12", "R", "QNWplusSASplusGEO", NA,
NA, NA, "G", NA, NA,
NA, NA, "math", NA, NA,
NA, "Full_foreign_d", NA, "Half_foreign_d", NA),
nrow = 5),
nodes = get_nodes(sem_fit) %>% mutate(label = c("G", "math", "Becker IQs", "Lynn IQs", "Rindermann IQs", "2 foreign parents", "1 foreign parent"))
)
GG_save("figs/sem.png")
write_sessioninfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidySEM_0.2.4 OpenMx_2.21.8 lavaan_0.6-15
## [4] patchwork_1.1.2 readxl_1.4.2 googlesheets4_1.1.0
## [7] kirkegaard_2023-06-27 psych_2.3.3 assertthat_0.2.1
## [10] weights_1.0.4 Hmisc_5.0-1 magrittr_2.0.3
## [13] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
## [16] dplyr_1.1.2 purrr_1.0.1 readr_2.1.4
## [19] tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2
## [22] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.14 jsonlite_1.8.4 farver_2.1.1
## [4] nloptr_2.0.3 rmarkdown_2.21 ragg_1.2.5
## [7] fs_1.6.2 vctrs_0.6.2 minqa_1.2.5
## [10] CompQuadForm_1.4.3 base64enc_0.1-3 htmltools_0.5.5
## [13] curl_5.0.0 broom_1.0.4 cellranger_1.1.0
## [16] Formula_1.2-5 parallelly_1.35.0 sass_0.4.5
## [19] StanHeaders_2.26.27 bslib_0.4.2 htmlwidgets_1.6.2
## [22] gsubfn_0.7 plyr_1.8.8 sandwich_3.0-2
## [25] zoo_1.8-12 cachem_1.0.7 igraph_1.5.0
## [28] lifecycle_1.0.3 pkgconfig_2.0.3 Matrix_1.5-1
## [31] R6_2.5.1 fastmap_1.1.1 future_1.32.0
## [34] digest_0.6.31 colorspace_2.1-0 ps_1.7.5
## [37] bain_0.2.8 textshaping_0.3.6 labeling_0.4.2
## [40] progressr_0.13.0 fansi_1.0.4 timechange_0.2.0
## [43] gdata_2.18.0.1 mgcv_1.8-42 abind_1.4-5
## [46] httr_1.4.5 compiler_4.3.0 gargle_1.4.0
## [49] withr_2.5.0 pander_0.6.5 htmlTable_2.4.1
## [52] backports_1.4.1 inline_0.3.19 blavaan_0.4-8
## [55] carData_3.0-5 fastDummies_1.6.3 highr_0.10
## [58] pkgbuild_1.4.0 MASS_7.3-58.3 gtools_3.9.4
## [61] loo_2.6.0 tools_4.3.0 pbivnorm_0.6.0
## [64] MplusAutomation_1.1.0 foreign_0.8-82 googledrive_2.1.0
## [67] future.apply_1.10.0 nnet_7.3-18 glue_1.6.2
## [70] quadprog_1.5-8 dbscan_1.1-11 callr_3.7.3
## [73] nlme_3.1-162 stringdist_0.9.10 grid_4.3.0
## [76] checkmate_2.2.0 cluster_2.1.4 generics_0.1.3
## [79] gtable_0.3.3 tzdb_0.3.0 data.table_1.14.8
## [82] hms_1.1.3 car_3.1-2 utf8_1.2.3
## [85] RANN_2.6.1 pillar_1.9.0 splines_4.3.0
## [88] lattice_0.20-45 tidyselect_1.2.0 knitr_1.42
## [91] gridExtra_2.3 stats4_4.3.0 xfun_0.39
## [94] texreg_1.38.6 matrixStats_1.0.0 rstan_2.21.8
## [97] proto_1.0.0 stringi_1.7.12 rematch_1.0.1
## [100] yaml_2.3.7 boot_1.3-28 evaluate_0.20
## [103] codetools_0.2-19 nonnest2_0.5-5 cli_3.6.1
## [106] RcppParallel_5.1.7 rpart_4.1.19 systemfonts_1.0.4
## [109] xtable_1.8-4 munsell_0.5.0 processx_3.8.1
## [112] jquerylib_0.1.4 Rcpp_1.0.10 globals_0.16.2
## [115] coda_0.19-4 parallel_4.3.0 rstantools_2.3.1
## [118] prettyunits_1.1.1 bayesplot_1.10.0 lme4_1.1-33
## [121] listenv_0.9.0 mvtnorm_1.1-3 scales_1.2.1
## [124] crayon_1.5.2 tmvnsim_1.0-2 rlang_1.1.0
## [127] mnormt_2.1.1 mice_3.15.0
write_rds(d, "data/data_out.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}