Init

options(
  digits = 3
)

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  googlesheets4,
  readxl,
  patchwork,
  lavaan,
  tidySEM
)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## 
## The following object is masked from 'package:psych':
## 
##     cor2cov
## 
## Loading required package: OpenMx
## To take full advantage of multiple cores, use:
##   mxOption(key='Number of Threads', value=parallel::detectCores()) #now
##   Sys.setenv(OMP_NUM_THREADS=parallel::detectCores()) #before library(OpenMx)
## 
## Attaching package: 'OpenMx'
## 
## The following object is masked from 'package:psych':
## 
##     tr
## 
## Registered S3 method overwritten by 'tidySEM':
##   method          from  
##   predict.MxModel OpenMx
theme_set(theme_bw())

Data

#read from sheets
gs4_deauth()
d_gaps = read_sheet("https://docs.google.com/spreadsheets/d/1ef6ieSnS7k-jpKjV1QgZ4XfynS7xiJfIE-LVNE4RaoM/edit#gid=0") %>% 
  mutate(ISO = pu_translate(Country)) %>% 
  df_legalize_names()
## ✔ Reading from "Norwegian math test gaps".
## ✔ Range 'scores'.
#Becker data
becker = readxl::read_excel("data/NIQ-DATASET-V1.3.3/NIQ-DATA (V1.3.3).xlsx", sheet = 2L, range = "A2:N205") %>% 
  df_legalize_names()
## New names:
## • `` -> `...1`
## • `` -> `...2`
#fix names somehow broken
names(becker)[1:2] = c("ISO_orig", "Country")

#US virgin islands
becker$Country[200] = "US Virgin Islands"

#redo ISO to avoid errors
becker %<>% mutate(
  ISO = pu_translate(Country)
)
## No exact match: Central African Rep.
## No exact match: Korea, North
## No exact match: Saint Helena, Ascension, and Tristan da Cunha
## No exact match: US Virgin Islands
## Best fuzzy match found: Central African Rep. -> Central African Republic with distance 5.00
## Best fuzzy match found: Korea, North -> Korea North with distance 1.00
## Best fuzzy match found: Saint Helena, Ascension, and Tristan da Cunha -> Saint Helena, Ascension and Tristan da Cunha with distance 1.00
## Best fuzzy match found: US Virgin Islands -> U.S. Virgin Islands with distance 2.00
#join
d = inner_join(d_gaps, becker %>% select(-Country, -ISO_orig), by = "ISO")

Analysis

#means including estimated CIs
bind_rows(
  tibble(
    group = "two foreign parents",
    math = d$Full_foreign_d,
    n = d$Full_foreign_n,
    country = d$Country
  ),
  tibble(
    group = "one foreign parent",
    math = d$Half_foreign_d,
    n = d$Half_foreign_n,
    country = d$Country
  )
) %>% 
  mutate(
    country_ordered = fct_reorder(country, math),
    CI_upper = math + 1.96 * 1/sqrt(n),
    CI_lower = math - 1.96 * 1/sqrt(n)
  ) %>% 
  ggplot(aes(math, country_ordered, color = group)) +
  geom_point() +
  geom_errorbarh(mapping = aes(xmin = CI_lower, xmax = CI_upper), alpha = .3, height = 0) +
  geom_vline(xintercept = 0, linetype = "dotted") +
  scale_y_discrete("Origin") +
  scale_x_continuous("Mathematical score relative to Norwegians (0)")

GG_save("figs/math_scores.png")

#simple plots
GG_scatter(d, "R", "Full_foreign_d", weights = d$Full_foreign_n %>% sqrt(), case_names = "Country") +
  ylab("Mean math score (z-score)") +
  xlab("National intelligence (Rindermann)") ->
  p_full

p_full
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/full_gap.png")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d, "R", "Half_foreign_d", weights = d$Half_foreign_n %>% sqrt(), case_names = "Country") +
  ylab("Mean math score (z-score)") +
  xlab("National intelligence (Rindermann)") ->
  p_half

p_half
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/half_gap.png")
## `geom_smooth()` using formula = 'y ~ x'
#confirm numerics
wtd.cor(x = d$Full_foreign_d, y = d$R, weight = d$Full_foreign_n %>% sqrt())
##   correlation std.err t.value  p.value
## Y       0.584   0.124    4.72 2.49e-05
wtd.cor(x = d$Half_foreign_d, y = d$R, weight = d$Half_foreign_n %>% sqrt())
##   correlation std.err t.value  p.value
## Y       0.643   0.117    5.51 1.87e-06
#combine plots
p_full + p_half
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/both_gaps.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#average for more precision
d$z_score_gaps = d %>% select(Full_foreign_d, Half_foreign_d) %>% df_standardize() %>% rowMeans()
d$combined_sample_size = d$Half_foreign_n + d$Full_foreign_n

GG_scatter(d, "R", "z_score_gaps", case_names = "Country", weights = d$combined_sample_size %>% sqrt()) +
  ylab("Mean math score, full and half combined (z-score)") +
  xlab("National intelligence (Rindermann)")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/combined_gaps.png")
## `geom_smooth()` using formula = 'y ~ x'
#numbers
#overall correlation matrix
d %>% select(Full_foreign_d, Half_foreign_d, z_score_gaps, R, L_and_V12plusGEO, QNWplusSASplusGEO) %>% wtd.cors()
##                   Full_foreign_d Half_foreign_d z_score_gaps     R
## Full_foreign_d             1.000          0.714        0.926 0.683
## Half_foreign_d             0.714          1.000        0.926 0.582
## z_score_gaps               0.926          0.926        1.000 0.683
## R                          0.683          0.582        0.683 1.000
## L_and_V12plusGEO           0.712          0.564        0.690 0.973
## QNWplusSASplusGEO          0.682          0.576        0.680 0.961
##                   L_and_V12plusGEO QNWplusSASplusGEO
## Full_foreign_d               0.712             0.682
## Half_foreign_d               0.564             0.576
## z_score_gaps                 0.690             0.680
## R                            0.973             0.961
## L_and_V12plusGEO             1.000             0.946
## QNWplusSASplusGEO            0.946             1.000
d %>% select(Full_foreign_d, Half_foreign_d, z_score_gaps, R, L_and_V12plusGEO, QNWplusSASplusGEO) %>% cor_matrix(p_val = T)
##                   Full_foreign_d Half_foreign_d z_score_gaps R        
## Full_foreign_d    "1"            "0.71***"      "0.93***"    "0.68***"
## Half_foreign_d    "0.71***"      "1"            "0.93***"    "0.58***"
## z_score_gaps      "0.93***"      "0.93***"      "1"          "0.68***"
## R                 "0.68***"      "0.58***"      "0.68***"    "1"      
## L_and_V12plusGEO  "0.71***"      "0.56***"      "0.69***"    "0.97***"
## QNWplusSASplusGEO "0.68***"      "0.58***"      "0.68***"    "0.96***"
##                   L_and_V12plusGEO QNWplusSASplusGEO
## Full_foreign_d    "0.71***"        "0.68***"        
## Half_foreign_d    "0.56***"        "0.58***"        
## z_score_gaps      "0.69***"        "0.68***"        
## R                 "0.97***"        "0.96***"        
## L_and_V12plusGEO  "1"              "0.95***"        
## QNWplusSASplusGEO "0.95***"        "1"
GG_save("figs/combined_gap.png")
## `geom_smooth()` using formula = 'y ~ x'
#SEM
model = "
G =~ QNWplusSASplusGEO + L_and_V12 + R
math =~ Full_foreign_d + Half_foreign_d
math ~ G"

d_std = d %>% select(Half_foreign_d, Full_foreign_d, QNWplusSASplusGEO, L_and_V12, R, combined_sample_size) %>% df_standardize(exclude = "combined_sample_size")
## Skipped combined_sample_size because it is in the exclude list
sem_fit = cfa(
  model,
  data = d_std,
  std.ov = T,
  std.lv = T
)

sem_fit
## lavaan 0.6.15 ended normally after 31 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        11
## 
##                                                   Used       Total
##   Number of observations                            41          45
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 4.595
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.331
sem_fit %>% summary()
## lavaan 0.6.15 ended normally after 31 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        11
## 
##                                                   Used       Total
##   Number of observations                            41          45
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 4.595
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.331
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   G =~                                                
##     QNWplsSASplGEO    0.958    0.112    8.527    0.000
##     L_and_V12         0.968    0.111    8.699    0.000
##     R                 0.980    0.110    8.914    0.000
##   math =~                                             
##     Full_foreign_d    0.587    0.116    5.065    0.000
##     Half_foreign_d    0.499    0.098    5.081    0.000
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   math ~                                              
##     G                 1.165    0.308    3.783    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .QNWplsSASplGEO    0.058    0.016    3.693    0.000
##    .L_and_V12         0.038    0.012    3.073    0.002
##    .R                 0.014    0.010    1.462    0.144
##    .Full_foreign_d    0.164    0.118    1.389    0.165
##    .Half_foreign_d    0.389    0.118    3.290    0.001
##     G                 1.000                           
##    .math              1.000
parameterestimates(sem_fit, standardized = T)
fitmeasures(sem_fit)
##                  npar                  fmin                 chisq 
##                11.000                 0.056                 4.595 
##                    df                pvalue        baseline.chisq 
##                 4.000                 0.331               290.589 
##           baseline.df       baseline.pvalue                   cfi 
##                10.000                 0.000                 0.998 
##                   tli                  nnfi                   rfi 
##                 0.995                 0.995                 0.960 
##                   nfi                  pnfi                   ifi 
##                 0.984                 0.394                 0.998 
##                   rni                  logl     unrestricted.logl 
##                 0.998              -145.354              -143.057 
##                   aic                   bic                ntotal 
##               312.709               331.558                41.000 
##                  bic2                 rmsea        rmsea.ci.lower 
##               297.124                 0.060                 0.000 
##        rmsea.ci.upper        rmsea.ci.level          rmsea.pvalue 
##                 0.250                 0.900                 0.384 
##        rmsea.close.h0 rmsea.notclose.pvalue     rmsea.notclose.h0 
##                 0.050                 0.538                 0.080 
##                   rmr            rmr_nomean                  srmr 
##                 0.009                 0.009                 0.009 
##          srmr_bentler   srmr_bentler_nomean                  crmr 
##                 0.009                 0.009                 0.011 
##           crmr_nomean            srmr_mplus     srmr_mplus_nomean 
##                 0.011                 0.009                 0.009 
##                 cn_05                 cn_01                   gfi 
##                85.657               119.465                 0.960 
##                  agfi                  pgfi                   mfi 
##                 0.850                 0.256                 0.993 
##                  ecvi 
##                 0.649
#slope
lm(Half_foreign_d ~ Full_foreign_d, data = d, weights = sqrt(d$combined_sample_size)) %>% summary()
## 
## Call:
## lm(formula = Half_foreign_d ~ Full_foreign_d, data = d, weights = sqrt(d$combined_sample_size))
## 
## Weighted Residuals:
##    Min     1Q Median     3Q    Max 
## -1.679 -0.155 -0.002  0.202  0.980 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.0364     0.0130   -2.80   0.0076 ** 
## Full_foreign_d   0.3281     0.0434    7.56    2e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.455 on 43 degrees of freedom
## Multiple R-squared:  0.57,   Adjusted R-squared:  0.56 
## F-statistic: 57.1 on 1 and 43 DF,  p-value: 2.03e-09
#SEM plot
graph_sem(
  sem_fit, 
  edges = get_edges(sem_fit) %>% filter(arrow != "both") %>% mutate(label = est_std),
  # layout = get_layout(sem_fit, layout_algorithm = "layout_nicely"),
  layout = matrix(c(NA, "L_and_V12", "R", "QNWplusSASplusGEO", NA,
                    NA, NA, "G", NA, NA, 
                    NA, NA, "math", NA, NA, 
                    NA, "Full_foreign_d", NA, "Half_foreign_d", NA), 
                  nrow = 5),
  nodes = get_nodes(sem_fit) %>% mutate(label = c("G", "math", "Becker IQs", "Lynn IQs", "Rindermann IQs", "2 foreign parents", "1 foreign parent"))
  )

GG_save("figs/sem.png")

Meta

write_sessioninfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] tidySEM_0.2.4         OpenMx_2.21.8         lavaan_0.6-15        
##  [4] patchwork_1.1.2       readxl_1.4.2          googlesheets4_1.1.0  
##  [7] kirkegaard_2023-06-27 psych_2.3.3           assertthat_0.2.1     
## [10] weights_1.0.4         Hmisc_5.0-1           magrittr_2.0.3       
## [13] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
## [16] dplyr_1.1.2           purrr_1.0.1           readr_2.1.4          
## [19] tidyr_1.3.0           tibble_3.2.1          ggplot2_3.4.2        
## [22] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.14       jsonlite_1.8.4        farver_2.1.1         
##   [4] nloptr_2.0.3          rmarkdown_2.21        ragg_1.2.5           
##   [7] fs_1.6.2              vctrs_0.6.2           minqa_1.2.5          
##  [10] CompQuadForm_1.4.3    base64enc_0.1-3       htmltools_0.5.5      
##  [13] curl_5.0.0            broom_1.0.4           cellranger_1.1.0     
##  [16] Formula_1.2-5         parallelly_1.35.0     sass_0.4.5           
##  [19] StanHeaders_2.26.27   bslib_0.4.2           htmlwidgets_1.6.2    
##  [22] gsubfn_0.7            plyr_1.8.8            sandwich_3.0-2       
##  [25] zoo_1.8-12            cachem_1.0.7          igraph_1.5.0         
##  [28] lifecycle_1.0.3       pkgconfig_2.0.3       Matrix_1.5-1         
##  [31] R6_2.5.1              fastmap_1.1.1         future_1.32.0        
##  [34] digest_0.6.31         colorspace_2.1-0      ps_1.7.5             
##  [37] bain_0.2.8            textshaping_0.3.6     labeling_0.4.2       
##  [40] progressr_0.13.0      fansi_1.0.4           timechange_0.2.0     
##  [43] gdata_2.18.0.1        mgcv_1.8-42           abind_1.4-5          
##  [46] httr_1.4.5            compiler_4.3.0        gargle_1.4.0         
##  [49] withr_2.5.0           pander_0.6.5          htmlTable_2.4.1      
##  [52] backports_1.4.1       inline_0.3.19         blavaan_0.4-8        
##  [55] carData_3.0-5         fastDummies_1.6.3     highr_0.10           
##  [58] pkgbuild_1.4.0        MASS_7.3-58.3         gtools_3.9.4         
##  [61] loo_2.6.0             tools_4.3.0           pbivnorm_0.6.0       
##  [64] MplusAutomation_1.1.0 foreign_0.8-82        googledrive_2.1.0    
##  [67] future.apply_1.10.0   nnet_7.3-18           glue_1.6.2           
##  [70] quadprog_1.5-8        dbscan_1.1-11         callr_3.7.3          
##  [73] nlme_3.1-162          stringdist_0.9.10     grid_4.3.0           
##  [76] checkmate_2.2.0       cluster_2.1.4         generics_0.1.3       
##  [79] gtable_0.3.3          tzdb_0.3.0            data.table_1.14.8    
##  [82] hms_1.1.3             car_3.1-2             utf8_1.2.3           
##  [85] RANN_2.6.1            pillar_1.9.0          splines_4.3.0        
##  [88] lattice_0.20-45       tidyselect_1.2.0      knitr_1.42           
##  [91] gridExtra_2.3         stats4_4.3.0          xfun_0.39            
##  [94] texreg_1.38.6         matrixStats_1.0.0     rstan_2.21.8         
##  [97] proto_1.0.0           stringi_1.7.12        rematch_1.0.1        
## [100] yaml_2.3.7            boot_1.3-28           evaluate_0.20        
## [103] codetools_0.2-19      nonnest2_0.5-5        cli_3.6.1            
## [106] RcppParallel_5.1.7    rpart_4.1.19          systemfonts_1.0.4    
## [109] xtable_1.8-4          munsell_0.5.0         processx_3.8.1       
## [112] jquerylib_0.1.4       Rcpp_1.0.10           globals_0.16.2       
## [115] coda_0.19-4           parallel_4.3.0        rstantools_2.3.1     
## [118] prettyunits_1.1.1     bayesplot_1.10.0      lme4_1.1-33          
## [121] listenv_0.9.0         mvtnorm_1.1-3         scales_1.2.1         
## [124] crayon_1.5.2          tmvnsim_1.0-2         rlang_1.1.0          
## [127] mnormt_2.1.1          mice_3.15.0
write_rds(d, "data/data_out.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}