Init

#options
options(
  digits = 3
)

# library(renv)
#devtools::install_version("broom", "0.5.6")

#packages
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  rms, 
  mirt,
  readxl,
  lavaan,
  ggeffects,
  pscl,
  patchwork
  )
## Loading required package: stats4
## Loading required package: lattice
## This is lavaan 0.6-17
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## 
## The following object is masked from 'package:psych':
## 
##     cor2cov
## 
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
#set the theme for all plots
theme_set(theme_bw())

Functions

#residual slope to get standardized value
resid_slope = function(x) {sqrt(1-x^2)}
resid_slope(.71)
## [1] 0.704
resid_slope(1)
## [1] 0
resid_slope(0)
## [1] 1
resid_slope(.5)
## [1] 0.866
sqrt(1 - (.5^2))
## [1] 0.866

Data

Read the data from various files.

#datasets
d = read_rds("data/VES merged.rds")
d_vars = read_csv("data/VES_dataset_variables.csv")
## Rows: 3341 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): code, name
## dbl (1): number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MMPI_items = d %>% select(MM010001:MM010566)
MMPI_questions = read_excel("data/MMPI-1 items.xlsx")
cog_items = read_tsv("data/VES_items_binary.tsv")
## Rows: 4462 Columns: 202
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## dbl (202): WAIS_R_INF_ITEM_1, WAIS_R_INF_ITEM_2, WAIS_R_INF_ITEM_3, WAIS_R_I...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# ves_more_data = read_excel("data/race_testis_semen_fertility_Emil.xlsx") #broken file
dysgenics2 = readxl::read_excel("data/Aldo_dysgenics_VES.xlsx")

#standardize some variables
std_vars = c("education", "income")
for (v in std_vars) d[[v]] = d[[v]] %>% standardize(focal_group = d$race == "White")

#fertility
d$fertility = dysgenics2$`TOTBRTH1 (TOTAL [ BIRTHS VETERAN FATHERED)`

Analyses

Descriptives

#race distribution
d$race %>% table2()
#age summary stats
d$age %>% describe2()
d$fertility %>% describe2()
d %>% 
  ggplot(aes(fertility)) +
  geom_bar(aes(y = after_stat(count)/sum(after_stat(count)))) +
  scale_x_continuous("Number of children (self-report at age 38)", breaks = seq(0, 100)) +
  scale_y_continuous("Percentage", labels = scales::percent)
## Warning: Removed 7 rows containing non-finite values (`stat_count()`).

GG_save("figs/dist_fertility.png")
## Warning: Removed 7 rows containing non-finite values (`stat_count()`).

Religiousness

Select items, factor analyze score.

#items and nice names
#these were suitable items that found in a search for god, and relig
#Dutton says:
#27;  50;  53;  58;  95;  98;  115;  209;  249;  258;  369;  476;  483;  490;  491
#and, debatably, 106 and 202
#Emil picks
religion_items = c(
  priest_healing = 53,
  prophets_accurate = 58,
  church_almost_every_week = 95,
  believe_in_second_life = 115,
  very_religious = 206,
  belief_in_devil_and_hell = 249,
  belief_in_god = 258,
  only_one_true_religion = 373,
  special_agent_god = 476,
  christ_miracles = 483,
  read_bible_often = 490,
  no_patience_people_true_believers = 491
)

#prevalence (proportion TRUE)
MMPI_questions$prevalence = colMeans(MMPI_items, na.rm = T)
MMPI_questions$missing = miss_by_var(MMPI_items, prop = T)

#our subset
MMPI_questions[religion_items, ]
#copy to main
for (i in seq_along(religion_items)) {
  d[[names(religion_items)[i]]] = MMPI_items[[religion_items[i]]]
}

#religion data
religion_data = MMPI_items[religion_items] %>% 
  set_names(names(religion_items))

#missing
religion_data_miss = miss_by_case(religion_data, reverse = T)

#correlations
religion_data %>% mixedCor()
## Call: mixedCor(data = .)
##                                   prst_ prph_ ch___ bl___ vry_r b____ blf__
## priest_healing                     1.00                                    
## prophets_accurate                  0.57  1.00                              
## church_almost_every_week           0.33  0.51  1.00                        
## believe_in_second_life             0.40  0.53  0.50  1.00                  
## very_religious                     0.39  0.53  0.68  0.44  1.00            
## belief_in_devil_and_hell           0.45  0.63  0.52  0.74  0.46  1.00      
## belief_in_god                      0.38  0.66  0.57  0.82  0.58  0.85  1.00
## only_one_true_religion             0.36  0.59  0.41  0.36  0.52  0.53  0.58
## special_agent_god                  0.34  0.45  0.38  0.35  0.55  0.35  0.34
## christ_miracles                    0.48  0.56  0.55  0.67  0.50  0.70  0.84
## read_bible_often                   0.49  0.69  0.79  0.48  0.76  0.54  0.56
## no_patience_people_true_believers -0.17 -0.29 -0.30 -0.23 -0.27 -0.31 -0.29
##                                   on___ spc__ chrs_ rd_b_
## priest_healing                                           
## prophets_accurate                                        
## church_almost_every_week                                 
## believe_in_second_life                                   
## very_religious                                           
## belief_in_devil_and_hell                                 
## belief_in_god                                            
## only_one_true_religion             1.00                  
## special_agent_god                  0.43  1.00            
## christ_miracles                    0.45  0.33  1.00      
## read_bible_often                   0.57  0.54  0.57  1.00
## no_patience_people_true_believers -0.43 -0.09 -0.25 -0.34
## [1]  1.00
#MIRT
religion_fit = mirt(religion_data, model = 1, technical = list(removeEmptyRows = T))
## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders
## 
Iteration: 1, Log-Lik: -24018.628, Max-Change: 1.29602
Iteration: 2, Log-Lik: -22252.825, Max-Change: 1.15131
Iteration: 3, Log-Lik: -21852.463, Max-Change: 1.10645
Iteration: 4, Log-Lik: -21769.643, Max-Change: 0.38262
Iteration: 5, Log-Lik: -21734.538, Max-Change: 0.40918
Iteration: 6, Log-Lik: -21716.733, Max-Change: 0.41660
Iteration: 7, Log-Lik: -21705.239, Max-Change: 0.24595
Iteration: 8, Log-Lik: -21699.488, Max-Change: 0.16613
Iteration: 9, Log-Lik: -21696.375, Max-Change: 0.08014
Iteration: 10, Log-Lik: -21695.420, Max-Change: 0.06825
Iteration: 11, Log-Lik: -21694.184, Max-Change: 0.06579
Iteration: 12, Log-Lik: -21693.506, Max-Change: 0.03398
Iteration: 13, Log-Lik: -21693.123, Max-Change: 0.01961
Iteration: 14, Log-Lik: -21692.941, Max-Change: 0.01109
Iteration: 15, Log-Lik: -21692.840, Max-Change: 0.00900
Iteration: 16, Log-Lik: -21692.697, Max-Change: 0.00249
Iteration: 17, Log-Lik: -21692.693, Max-Change: 0.00122
Iteration: 18, Log-Lik: -21692.690, Max-Change: 0.00116
Iteration: 19, Log-Lik: -21692.683, Max-Change: 0.00029
Iteration: 20, Log-Lik: -21692.683, Max-Change: 0.00028
Iteration: 21, Log-Lik: -21692.683, Max-Change: 0.00025
Iteration: 22, Log-Lik: -21692.682, Max-Change: 0.00019
Iteration: 23, Log-Lik: -21692.682, Max-Change: 0.00093
Iteration: 24, Log-Lik: -21692.682, Max-Change: 0.00044
Iteration: 25, Log-Lik: -21692.682, Max-Change: 0.00021
Iteration: 26, Log-Lik: -21692.682, Max-Change: 0.00077
Iteration: 27, Log-Lik: -21692.682, Max-Change: 0.00067
Iteration: 28, Log-Lik: -21692.682, Max-Change: 0.00020
Iteration: 29, Log-Lik: -21692.682, Max-Change: 0.00069
Iteration: 30, Log-Lik: -21692.682, Max-Change: 0.00064
Iteration: 31, Log-Lik: -21692.681, Max-Change: 0.00018
Iteration: 32, Log-Lik: -21692.681, Max-Change: 0.00063
Iteration: 33, Log-Lik: -21692.681, Max-Change: 0.00057
Iteration: 34, Log-Lik: -21692.681, Max-Change: 0.00016
Iteration: 35, Log-Lik: -21692.681, Max-Change: 0.00058
Iteration: 36, Log-Lik: -21692.681, Max-Change: 0.00051
Iteration: 37, Log-Lik: -21692.681, Max-Change: 0.00015
Iteration: 38, Log-Lik: -21692.681, Max-Change: 0.00054
Iteration: 39, Log-Lik: -21692.681, Max-Change: 0.00047
Iteration: 40, Log-Lik: -21692.681, Max-Change: 0.00014
Iteration: 41, Log-Lik: -21692.681, Max-Change: 0.00050
Iteration: 42, Log-Lik: -21692.681, Max-Change: 0.00044
Iteration: 43, Log-Lik: -21692.681, Max-Change: 0.00013
Iteration: 44, Log-Lik: -21692.681, Max-Change: 0.00046
Iteration: 45, Log-Lik: -21692.681, Max-Change: 0.00042
Iteration: 46, Log-Lik: -21692.681, Max-Change: 0.00013
Iteration: 47, Log-Lik: -21692.681, Max-Change: 0.00044
Iteration: 48, Log-Lik: -21692.681, Max-Change: 0.00039
Iteration: 49, Log-Lik: -21692.681, Max-Change: 0.00012
Iteration: 50, Log-Lik: -21692.681, Max-Change: 0.00043
Iteration: 51, Log-Lik: -21692.681, Max-Change: 0.00037
Iteration: 52, Log-Lik: -21692.681, Max-Change: 0.00012
Iteration: 53, Log-Lik: -21692.681, Max-Change: 0.00043
Iteration: 54, Log-Lik: -21692.681, Max-Change: 0.00036
Iteration: 55, Log-Lik: -21692.680, Max-Change: 0.00011
Iteration: 56, Log-Lik: -21692.680, Max-Change: 0.00042
Iteration: 57, Log-Lik: -21692.680, Max-Change: 0.00034
Iteration: 58, Log-Lik: -21692.680, Max-Change: 0.00011
Iteration: 59, Log-Lik: -21692.680, Max-Change: 0.00041
Iteration: 60, Log-Lik: -21692.680, Max-Change: 0.00032
Iteration: 61, Log-Lik: -21692.680, Max-Change: 0.00010
Iteration: 62, Log-Lik: -21692.680, Max-Change: 0.00041
Iteration: 63, Log-Lik: -21692.680, Max-Change: 0.00031
Iteration: 64, Log-Lik: -21692.680, Max-Change: 0.00010
religion_fit
## 
## Call:
## mirt(data = religion_data, model = 1, technical = list(removeEmptyRows = T))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 64 EM iterations.
## mirt version: 1.39 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -21693
## Estimated parameters: 24 
## AIC = 43433
## BIC = 43587; SABIC = 43511
religion_fit %>% summary()
##                                       F1    h2
## priest_healing                     0.599 0.359
## prophets_accurate                  0.757 0.573
## church_almost_every_week           0.763 0.582
## believe_in_second_life             0.779 0.607
## very_religious                     0.787 0.619
## belief_in_devil_and_hell           0.830 0.688
## belief_in_god                      0.954 0.909
## only_one_true_religion             0.649 0.421
## special_agent_god                  0.603 0.364
## christ_miracles                    0.811 0.658
## read_bible_often                   0.913 0.834
## no_patience_people_true_believers -0.360 0.130
## 
## SS loadings:  6.74 
## Proportion Var:  0.562 
## 
## Factor correlations: 
## 
##    F1
## F1  1
religion_fit_scores = fscores(religion_fit, full.scores = T, full.scores.SE = T)
d$religiousness = NA
d$religiousness[religion_data_miss != 0] = religion_fit_scores[, 1]
d$religiousness %<>% standardize(focal_group = (d$race == "White"))

#distribution
d$religiousness %>% GG_denhist() +
  scale_x_continuous("Religiousness (factor score)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/religiousness_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#psych package (just for verification)
religion_fit_psych = irt.fa(religion_data)

religion_fit_psych$fa
## Factor Analysis using method =  minres
## Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                     MR1   h2   u2 com
## priest_healing                     0.56 0.31 0.69   1
## prophets_accurate                  0.78 0.61 0.39   1
## church_almost_every_week           0.73 0.53 0.47   1
## believe_in_second_life             0.73 0.53 0.47   1
## very_religious                     0.73 0.54 0.46   1
## belief_in_devil_and_hell           0.81 0.65 0.35   1
## belief_in_god                      0.87 0.76 0.24   1
## only_one_true_religion             0.67 0.44 0.56   1
## special_agent_god                  0.53 0.28 0.72   1
## christ_miracles                    0.79 0.62 0.38   1
## read_bible_often                   0.82 0.68 0.32   1
## no_patience_people_true_believers -0.37 0.14 0.86   1
## 
##                 MR1
## SS loadings    6.10
## Proportion Var 0.51
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  66  with the objective function =  9.62 with Chi Square =  42859
## df of  the model are 54  and the objective function was  3.15 
## 
## The root mean square of the residuals (RMSR) is  0.09 
## The df corrected root mean square of the residuals is  0.1 
## 
## The harmonic n.obs is  4462 with the empirical chi square  4638  with prob <  0 
## The total n.obs was  4462  with Likelihood Chi Square =  14046  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.6
## RMSEA index =  0.241  and the 90 % confidence intervals are  0.238 0.244
## BIC =  13592
## Fit based upon off diagonal values = 0.97
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.97
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.89
religion_fit_psych_scores = scoreIrt(religion_fit_psych, religion_data)

d$religiousness2 = religion_fit_psych_scores[, 1]
d$religiousness2 %<>% standardize(focal_group = (d$race == "White"))

#scores match?
cor.test(d$religiousness, d$religiousness2)
## 
##  Pearson's product-moment correlation
## 
## data:  d$religiousness and d$religiousness2
## t = 379, df = 4460, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.984 0.986
## sample estimates:
##   cor 
## 0.985
ggplot(d, aes(religiousness, religiousness2)) +
  geom_count() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#reliability
alpha(religion_data, check.keys=TRUE)
## Warning in alpha(religion_data, check.keys = TRUE): Some items were negatively correlated with total scale and were automatically reversed.
##  This is indicated by a negative sign for the variable name.
## 
## Reliability analysis   
## Call: alpha(x = religion_data, check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.79      0.79     0.8      0.24 3.8 0.0044 0.45 0.22      0.2
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.78  0.79   0.8
## Duhachek  0.78  0.79   0.8
## 
##  Reliability if an item is dropped:
##                                    raw_alpha std.alpha G6(smc) average_r S/N
## priest_healing                          0.79      0.79    0.80      0.26 3.8
## prophets_accurate                       0.76      0.76    0.78      0.23 3.2
## church_almost_every_week                0.77      0.77    0.78      0.23 3.4
## believe_in_second_life                  0.77      0.77    0.78      0.23 3.4
## very_religious                          0.78      0.77    0.78      0.24 3.4
## belief_in_devil_and_hell                0.76      0.76    0.77      0.23 3.2
## belief_in_god                           0.78      0.78    0.78      0.24 3.5
## only_one_true_religion                  0.77      0.77    0.78      0.24 3.4
## special_agent_god                       0.79      0.79    0.80      0.25 3.8
## christ_miracles                         0.77      0.77    0.78      0.23 3.3
## read_bible_often                        0.77      0.77    0.78      0.23 3.3
## no_patience_people_true_believers-      0.80      0.79    0.80      0.26 3.9
##                                    alpha se var.r med.r
## priest_healing                       0.0045 0.014  0.24
## prophets_accurate                    0.0051 0.015  0.19
## church_almost_every_week             0.0048 0.014  0.20
## believe_in_second_life               0.0048 0.013  0.20
## very_religious                       0.0048 0.014  0.20
## belief_in_devil_and_hell             0.0051 0.013  0.20
## belief_in_god                        0.0047 0.012  0.23
## only_one_true_religion               0.0048 0.016  0.19
## special_agent_god                    0.0045 0.014  0.24
## christ_miracles                      0.0049 0.013  0.20
## read_bible_often                     0.0048 0.013  0.20
## no_patience_people_true_believers-   0.0043 0.014  0.24
## 
##  Item statistics 
##                                       n raw.r std.r r.cor r.drop  mean   sd
## priest_healing                     4419  0.37  0.41  0.31   0.27 0.083 0.28
## prophets_accurate                  4292  0.67  0.65  0.61   0.55 0.505 0.50
## church_almost_every_week           4461  0.60  0.59  0.55   0.48 0.250 0.43
## believe_in_second_life             4410  0.60  0.59  0.55   0.48 0.756 0.43
## very_religious                     4449  0.56  0.58  0.53   0.46 0.171 0.38
## belief_in_devil_and_hell           4413  0.68  0.66  0.63   0.57 0.630 0.48
## belief_in_god                      4426  0.51  0.53  0.48   0.42 0.915 0.28
## only_one_true_religion             4423  0.61  0.58  0.53   0.47 0.374 0.48
## special_agent_god                  4442  0.38  0.42  0.32   0.28 0.089 0.28
## christ_miracles                    4328  0.61  0.61  0.57   0.50 0.766 0.42
## read_bible_often                   4452  0.58  0.60  0.57   0.49 0.126 0.33
## no_patience_people_true_believers- 4429  0.40  0.38  0.27   0.25 0.734 0.44
## 
## Non missing response frequency for each item
##                                      0    1 miss
## priest_healing                    0.92 0.08 0.01
## prophets_accurate                 0.50 0.50 0.04
## church_almost_every_week          0.75 0.25 0.00
## believe_in_second_life            0.24 0.76 0.01
## very_religious                    0.83 0.17 0.00
## belief_in_devil_and_hell          0.37 0.63 0.01
## belief_in_god                     0.08 0.92 0.01
## only_one_true_religion            0.63 0.37 0.01
## special_agent_god                 0.91 0.09 0.00
## christ_miracles                   0.23 0.77 0.03
## read_bible_often                  0.87 0.13 0.00
## no_patience_people_true_believers 0.73 0.27 0.01
empirical_rxx(religion_fit_scores)
##    F1 
## 0.836
marginal_rxx(religion_fit)
## [1] 0.834
plot(religion_fit, type = "rxx")

# save_plot_to_file(
#   plot(religion_fit, type = "rxx"), 
#   filename = "figs/religiousness_reliability.png")

#religiousness by race
GG_group_means(d, "religiousness", "race", type = "boxplot")

SMD_matrix(-d$religiousness, d$race, extended_output = T)
## $d
##            White  Black Hispanic  Asian  Native
## White         NA  0.352   0.1544 -0.209  0.0663
## Black     0.3522     NA  -0.1978 -0.561 -0.2859
## Hispanic  0.1544 -0.198       NA -0.364 -0.0881
## Asian    -0.2091 -0.561  -0.3635     NA  0.2755
## Native    0.0663 -0.286  -0.0881  0.275      NA
## 
## $d_string
##          White                Black                 Hispanic             
## White    NA                   "0.35 [0.26 0.44]"    "0.15 [0.01 0.30]"   
## Black    "0.35 [0.26 0.44]"   NA                    "-0.20 [-0.36 -0.03]"
## Hispanic "0.15 [0.01 0.30]"   "-0.20 [-0.36 -0.03]" NA                   
## Asian    "-0.21 [-0.55 0.13]" "-0.56 [-0.91 -0.21]" "-0.36 [-0.73 0.00]" 
## Native   "0.07 [-0.22 0.35]"  "-0.29 [-0.58 0.01]"  "-0.09 [-0.40 0.22]" 
##          Asian                 Native              
## White    "-0.21 [-0.55 0.13]"  "0.07 [-0.22 0.35]" 
## Black    "-0.56 [-0.91 -0.21]" "-0.29 [-0.58 0.01]"
## Hispanic "-0.36 [-0.73 0.00]"  "-0.09 [-0.40 0.22]"
## Asian    NA                    "0.28 [-0.16 0.71]" 
## Native   "0.28 [-0.16 0.71]"   NA                  
## 
## $CI_lower
##           White  Black Hispanic  Asian Native
## White        NA  0.260    0.012 -0.547 -0.216
## Black     0.260     NA   -0.361 -0.910 -0.579
## Hispanic  0.012 -0.361       NA -0.729 -0.401
## Asian    -0.547 -0.910   -0.729     NA -0.164
## Native   -0.216 -0.579   -0.401 -0.164     NA
## 
## $CI_upper
##          White    Black Hispanic    Asian  Native
## White       NA  0.44403  0.29676  0.12858 0.34820
## Black    0.444       NA -0.03467 -0.21297 0.00733
## Hispanic 0.297 -0.03467       NA  0.00155 0.22446
## Asian    0.129 -0.21297  0.00155       NA 0.71500
## Native   0.348  0.00733  0.22446  0.71500      NA
## 
## $se_z
## [1] 1.96
## 
## $se
##           White  Black Hispanic Asian Native
## White        NA 0.0468   0.0726 0.172  0.144
## Black    0.0468     NA   0.0833 0.178  0.150
## Hispanic 0.0726 0.0833       NA 0.186  0.159
## Asian    0.1723 0.1778   0.1863    NA  0.224
## Native   0.1438 0.1496   0.1594 0.224     NA
## 
## $p
##             White    Black Hispanic   Asian Native
## White          NA 5.43e-14   0.0336 0.22485  0.645
## Black    5.43e-14       NA   0.0175 0.00159  0.056
## Hispanic 3.36e-02 1.75e-02       NA 0.05098  0.581
## Asian    2.25e-01 1.59e-03   0.0510      NA  0.219
## Native   6.45e-01 5.60e-02   0.5808 0.21929     NA
## 
## $pairwise_n
##          White Black Hispanic Asian Native
## White       NA  4179     3854  3688   3703
## Black     4179    NA      725   559    574
## Hispanic  3854   725       NA   234    249
## Asian     3688   559      234    NA     83
## Native    3703   574      249    83     NA

Intelligence

#g tests
g_tests_early = c("VE_time1", "AR_time1", "PA", "GIT", "AFQT")
g_tests_later = c("VE_time2", "AR_time2", "WAIS_BD", "WAIS_GI", "WRAT", "PASAT", "WLGT", "copy_direct", "copy_immediate", "copy_delayed", "CVLT", "WCST", "GPT_left", "GPT_right")
g_tests = c(g_tests_early, g_tests_later)

#impute missing data
g_test_data_imp = d[g_tests] %>% miss_impute()

#standardize test variables
for (v in g_tests) d[[v]] = d[[v]] %>% standardize(focal_group = d$race == "White")

#all tests g
fa_g = fa(g_test_data_imp)
fa_g
## Factor Analysis using method =  minres
## Call: fa(r = g_test_data_imp)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 MR1   h2   u2 com
## VE_time1       0.82 0.67 0.33   1
## AR_time1       0.81 0.66 0.34   1
## PA             0.71 0.50 0.50   1
## GIT            0.69 0.48 0.52   1
## AFQT           0.85 0.73 0.27   1
## VE_time2       0.82 0.67 0.33   1
## AR_time2       0.82 0.67 0.33   1
## WAIS_BD        0.67 0.45 0.55   1
## WAIS_GI        0.76 0.58 0.42   1
## WRAT           0.73 0.53 0.47   1
## PASAT          0.57 0.32 0.68   1
## WLGT           0.49 0.24 0.76   1
## copy_direct    0.47 0.22 0.78   1
## copy_immediate 0.55 0.30 0.70   1
## copy_delayed   0.55 0.30 0.70   1
## CVLT           0.42 0.18 0.82   1
## WCST           0.46 0.21 0.79   1
## GPT_left       0.34 0.12 0.88   1
## GPT_right      0.33 0.11 0.89   1
## 
##                 MR1
## SS loadings    7.94
## Proportion Var 0.42
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  171  with the objective function =  12.6 with Chi Square =  56117
## df of  the model are 152  and the objective function was  3.89 
## 
## The root mean square of the residuals (RMSR) is  0.09 
## The df corrected root mean square of the residuals is  0.1 
## 
## The harmonic n.obs is  4462 with the empirical chi square  13120  with prob <  0 
## The total n.obs was  4462  with Likelihood Chi Square =  17324  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.655
## RMSEA index =  0.159  and the 90 % confidence intervals are  0.157 0.161
## BIC =  16047
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.97
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.89
#earlier tests only
fa_g_time1 = fa(g_test_data_imp[g_tests_early])
fa_g_time1
## Factor Analysis using method =  minres
## Call: fa(r = g_test_data_imp[g_tests_early])
## Standardized loadings (pattern matrix) based upon correlation matrix
##           MR1   h2   u2 com
## VE_time1 0.82 0.67 0.33   1
## AR_time1 0.82 0.68 0.32   1
## PA       0.70 0.49 0.51   1
## GIT      0.73 0.53 0.47   1
## AFQT     0.92 0.85 0.15   1
## 
##                 MR1
## SS loadings    3.22
## Proportion Var 0.64
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  10  with the objective function =  3.14 with Chi Square =  13984
## df of  the model are 5  and the objective function was  0.16 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic n.obs is  4462 with the empirical chi square  175  with prob <  6.6e-36 
## The total n.obs was  4462  with Likelihood Chi Square =  711  with prob <  1.7e-151 
## 
## Tucker Lewis Index of factoring reliability =  0.899
## RMSEA index =  0.178  and the 90 % confidence intervals are  0.167 0.189
## BIC =  669
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.96
## Multiple R square of scores with factors          0.93
## Minimum correlation of possible factor scores     0.85
#later tests only
fa_g_time2 = fa(g_test_data_imp[g_tests_later])
fa_g_time2
## Factor Analysis using method =  minres
## Call: fa(r = g_test_data_imp[g_tests_later])
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 MR1   h2   u2 com
## VE_time2       0.78 0.61 0.39   1
## AR_time2       0.79 0.62 0.38   1
## WAIS_BD        0.67 0.45 0.55   1
## WAIS_GI        0.72 0.52 0.48   1
## WRAT           0.71 0.50 0.50   1
## PASAT          0.58 0.33 0.67   1
## WLGT           0.50 0.25 0.75   1
## copy_direct    0.51 0.26 0.74   1
## copy_immediate 0.61 0.37 0.63   1
## copy_delayed   0.61 0.37 0.63   1
## CVLT           0.45 0.20 0.80   1
## WCST           0.47 0.22 0.78   1
## GPT_left       0.38 0.14 0.86   1
## GPT_right      0.37 0.13 0.87   1
## 
##                 MR1
## SS loadings    5.01
## Proportion Var 0.36
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  91  with the objective function =  7.17 with Chi Square =  31949
## df of  the model are 77  and the objective function was  2.8 
## 
## The root mean square of the residuals (RMSR) is  0.11 
## The df corrected root mean square of the residuals is  0.12 
## 
## The harmonic n.obs is  4462 with the empirical chi square  9397  with prob <  0 
## The total n.obs was  4462  with Likelihood Chi Square =  12486  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.54
## RMSEA index =  0.19  and the 90 % confidence intervals are  0.187 0.193
## BIC =  11839
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.95
## Multiple R square of scores with factors          0.90
## Minimum correlation of possible factor scores     0.81
#save scores, standardize to white
d$g = fa_g$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time1 = fa_g_time1$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time2 = fa_g_time2$scores[, 1] %>% standardize(focal_group = d$race == "White")

#IQ scale
d$IQ = d$g * 15 + 100

#plot dist
GG_denhist(d, "g", vline = F)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/gdist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
alpha(g_test_data_imp)
## Number of categories should be increased  in order to count frequencies.
## 
## Reliability analysis   
## Call: alpha(x = g_test_data_imp)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.85      0.92    0.95      0.39  12 0.0023   42 9.6     0.34
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.85  0.85  0.86
## Duhachek  0.85  0.85  0.86
## 
##  Reliability if an item is dropped:
##                raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## VE_time1            0.83      0.92    0.94      0.38  11   0.0026 0.026  0.34
## AR_time1            0.83      0.92    0.94      0.38  11   0.0027 0.027  0.33
## PA                  0.84      0.92    0.94      0.38  11   0.0025 0.029  0.33
## GIT                 0.84      0.92    0.94      0.39  11   0.0025 0.029  0.34
## AFQT                0.86      0.92    0.94      0.38  11   0.0023 0.027  0.33
## VE_time2            0.83      0.92    0.94      0.38  11   0.0027 0.026  0.34
## AR_time2            0.83      0.92    0.94      0.38  11   0.0028 0.027  0.33
## WAIS_BD             0.85      0.92    0.94      0.39  11   0.0023 0.030  0.33
## WAIS_GI             0.85      0.92    0.94      0.38  11   0.0023 0.028  0.33
## WRAT                0.84      0.92    0.94      0.38  11   0.0025 0.028  0.34
## PASAT               0.88      0.92    0.94      0.39  12   0.0017 0.031  0.34
## WLGT                0.85      0.92    0.95      0.40  12   0.0024 0.030  0.36
## copy_direct         0.85      0.92    0.95      0.40  12   0.0023 0.030  0.36
## copy_immediate      0.85      0.92    0.94      0.39  12   0.0024 0.029  0.36
## copy_delayed        0.85      0.92    0.94      0.39  12   0.0023 0.029  0.36
## CVLT                0.85      0.92    0.95      0.40  12   0.0023 0.029  0.37
## WCST                0.86      0.92    0.95      0.40  12   0.0023 0.030  0.37
## GPT_left            0.85      0.93    0.94      0.41  12   0.0023 0.027  0.37
## GPT_right           0.85      0.93    0.94      0.41  12   0.0023 0.027  0.37
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop   mean    sd
## VE_time1       4462  0.81  0.79  0.80   0.76 107.14 22.20
## AR_time1       4462  0.83  0.79  0.79   0.78 104.38 21.94
## PA             4462  0.69  0.72  0.71   0.62 104.26 22.56
## GIT            4462  0.70  0.69  0.67   0.64 102.02 18.33
## AFQT           4462  0.80  0.83  0.84   0.80   0.14  0.81
## VE_time2       4462  0.82  0.80  0.81   0.77 116.52 23.04
## AR_time2       4462  0.84  0.81  0.81   0.79 104.56 24.40
## WAIS_BD        4462  0.63  0.71  0.69   0.63  10.52  2.64
## WAIS_GI        4462  0.72  0.75  0.74   0.72  10.07  2.80
## WRAT           4462  0.75  0.73  0.72   0.71  61.16 14.74
## PASAT          4462  0.74  0.61  0.57   0.56 108.72 50.72
## WLGT           4462  0.53  0.53  0.49   0.49  35.12 10.92
## copy_direct    4462  0.44  0.54  0.49   0.43  32.73  3.31
## copy_immediate 4462  0.50  0.61  0.62   0.47  20.11  6.75
## copy_delayed   4462  0.50  0.61  0.62   0.47  20.27  6.33
## CVLT           4462  0.42  0.48  0.43   0.40  11.06  2.33
## WCST           4462  0.44  0.51  0.46   0.44   0.79  0.17
## GPT_left       4462  0.40  0.43  0.39   0.33 -77.38 13.76
## GPT_right      4462  0.39  0.42  0.38   0.33 -73.68 11.84
#method variance check
fa_g_all_methods = fa_all_methods(g_test_data_imp)
## 1 out of 40 - regression_minres
## Saving results from regression_minres
## 2 out of 40 - Thurstone_minres
## Saving results from Thurstone_minres
## 3 out of 40 - tenBerge_minres
## Saving results from tenBerge_minres
## 4 out of 40 - Anderson_minres
## Skipping Anderson_minres due to extraction error
## 5 out of 40 - Bartlett_minres
## Saving results from Bartlett_minres
## 6 out of 40 - regression_ols
## Saving results from regression_ols
## 7 out of 40 - Thurstone_ols
## Saving results from Thurstone_ols
## 8 out of 40 - tenBerge_ols
## Saving results from tenBerge_ols
## 9 out of 40 - Anderson_ols
## Skipping Anderson_ols due to extraction error
## 10 out of 40 - Bartlett_ols
## Saving results from Bartlett_ols
## 11 out of 40 - regression_wls
## Saving results from regression_wls
## 12 out of 40 - Thurstone_wls
## Saving results from Thurstone_wls
## 13 out of 40 - tenBerge_wls
## Saving results from tenBerge_wls
## 14 out of 40 - Anderson_wls
## Skipping Anderson_wls due to extraction error
## 15 out of 40 - Bartlett_wls
## Saving results from Bartlett_wls
## 16 out of 40 - regression_gls
## Saving results from regression_gls
## 17 out of 40 - Thurstone_gls
## Saving results from Thurstone_gls
## 18 out of 40 - tenBerge_gls
## Saving results from tenBerge_gls
## 19 out of 40 - Anderson_gls
## Skipping Anderson_gls due to extraction error
## 20 out of 40 - Bartlett_gls
## Saving results from Bartlett_gls
## 21 out of 40 - regression_pa
## Saving results from regression_pa
## 22 out of 40 - Thurstone_pa
## Saving results from Thurstone_pa
## 23 out of 40 - tenBerge_pa
## Saving results from tenBerge_pa
## 24 out of 40 - Anderson_pa
## Skipping Anderson_pa due to extraction error
## 25 out of 40 - Bartlett_pa
## Saving results from Bartlett_pa
## 26 out of 40 - regression_ml
## Saving results from regression_ml
## 27 out of 40 - Thurstone_ml
## Saving results from Thurstone_ml
## 28 out of 40 - tenBerge_ml
## Saving results from tenBerge_ml
## 29 out of 40 - Anderson_ml
## Skipping Anderson_ml due to extraction error
## 30 out of 40 - Bartlett_ml
## Saving results from Bartlett_ml
## 31 out of 40 - regression_minchi
## Saving results from regression_minchi
## 32 out of 40 - Thurstone_minchi
## Saving results from Thurstone_minchi
## 33 out of 40 - tenBerge_minchi
## Saving results from tenBerge_minchi
## 34 out of 40 - Anderson_minchi
## Skipping Anderson_minchi due to extraction error
## 35 out of 40 - Bartlett_minchi
## Saving results from Bartlett_minchi
## 36 out of 40 - regression_minrank
## Saving results from regression_minrank
## 37 out of 40 - Thurstone_minrank
## Saving results from Thurstone_minrank
## 38 out of 40 - tenBerge_minrank
## Saving results from tenBerge_minrank
## 39 out of 40 - Anderson_minrank
## Skipping Anderson_minrank due to extraction error
## 40 out of 40 - Bartlett_minrank
## Saving results from Bartlett_minrank
fa_g_all_methods$scores %>% GG_heatmap()

GG_save("figs/fa_methods.png")

fa_g_all_methods$scores %>% 
  cor() %>% 
  MAT_half(diag = F) %>% 
  describe2()

Main results

#correlations
d %>% select(fertility, g, religiousness, education, income) %>% 
  cor_matrix(p_val = T)
##               fertility  g          religiousness education  income    
## fertility     "1"        "-0.11***" "0.17***"     "-0.10***" "-0.02"   
## g             "-0.11***" "1"        "-0.18***"    "0.55***"  "0.40***" 
## religiousness "0.17***"  "-0.18***" "1"           "-0.09***" "-0.11***"
## education     "-0.10***" "0.55***"  "-0.09***"    "1"        "0.35***" 
## income        "-0.02"    "0.40***"  "-0.11***"    "0.35***"  "1"
d %>% select(fertility, g, religiousness, education, income) %>% 
  polycor::hetcor()
## 
## Two-Step Estimates
## 
## Correlations/Type of Correlation:
##               fertility       g religiousness education  income
## fertility             1 Pearson       Pearson   Pearson Pearson
## g                -0.108       1       Pearson   Pearson Pearson
## religiousness     0.174  -0.184             1   Pearson Pearson
## education        -0.105   0.545       -0.0907         1 Pearson
## income          -0.0212   0.397        -0.108     0.349       1
## 
## Standard Errors:
##               fertility      g religiousness education
## fertility                                             
## g                0.0149                               
## religiousness    0.0147 0.0146                        
## education         0.015 0.0106         0.015          
## income           0.0151 0.0127        0.0149    0.0133
## 
## n = 4373 
## 
## P-values for Tests of Bivariate Normality:
##               fertility         g religiousness education
## fertility                                                
## g              7.49e-56                                  
## religiousness  5.62e-57  1.14e-29                        
## education      1.5e-290 6.04e-257     1.74e-258          
## income         1.78e-69  4.34e-25      1.17e-27 9.71e-266
#is relationship linear?
GG_scatter(d, "religiousness", "fertility") +
  geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/religiousness_fertility.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#models
#standardize variables of interest
d_z = d %>% select(fertility, religiousness, g, age, race) %>% df_standardize() %>% mutate(g = standardize(g, focal_group = (race == "White")))
## Skipped race because it is class factor
d_z_white = d_z %>% filter(race == "White")
d_z_black = d_z %>% filter(race == "Black")
d_z_bw = bind_rows(d_z_white, d_z_black) %>% mutate(race = fct_drop(race))

main_models = list(
  no_controls = ols(fertility ~ religiousness + g, data = d_z),
  controls = ols(fertility ~ religiousness + g + race + age, data = d_z),
  interact = ols(fertility ~ g * religiousness + race + age, data = d_z),
  g_spline = ols(fertility ~ rcs(g) * religiousness + race + age, data = d_z),
  reli_spline = ols(fertility ~ g * rcs(religiousness) + race + age, data = d_z),
  both_spline = ols(fertility ~ rcs(g )* rcs(religiousness) + race + age, data = d_z),
  white_interact = ols(fertility ~ g * religiousness + age, data = d_z_white),
  black_interact = ols(fertility ~ g * religiousness + age, data = d_z_black),
  race_3interact = ols(fertility ~ g * religiousness * race + age, data = d_z_bw)
)

#p values for model comparisons
#test linear interaction
lrtest(main_models[[2]], main_models[[3]])
## 
## Model 1: fertility ~ religiousness + g + race + age
## Model 2: fertility ~ g * religiousness + race + age
## 
## L.R. Chisq       d.f.          P 
##    9.66988    1.00000    0.00187
#test nonlinear g effect
lrtest(main_models[[3]], main_models[[4]])
## 
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ rcs(g) * religiousness + race + age
## 
## L.R. Chisq       d.f.          P 
##      1.939      6.000      0.925
#test nonlinear religion effect
lrtest(main_models[[3]], main_models[[5]])
## 
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ g * rcs(religiousness) + race + age
## 
## L.R. Chisq       d.f.          P 
##      9.362      6.000      0.154
#test nonlinear for both
lrtest(main_models[[3]], main_models[[6]])
## 
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ rcs(g) * rcs(religiousness) + race + age
## 
## L.R. Chisq       d.f.          P 
##     17.366     21.000      0.689
#summarise
main_models[c(1:3, 7:8)] %>% 
  summarize_models(asterisks_only = F)
#visualize
#interaction with controls
ggpredict(main_models$interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("General intelligence (0 = White mean)") +
  scale_y_continuous("Number of children at age 38") +
  ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/g_fertility_interaction.png")

#whites
ggpredict(main_models$white_interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("General intelligence (0 = White mean)") +
  scale_y_continuous("Number of children at age 38") +
  ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/g_fertility_interaction_whites.png")

Count variable models

#standardize non-fertility variables
d_z_notfert = d %>% select(fertility, religiousness, g, age, race) %>% df_standardize(exclude = "fertility") %>% mutate(g = standardize(g, focal_group = (race == "White")))
## Skipped fertility because it is in the exclude list
## Skipped race because it is class factor
#hurdle models
count_models = list(
  poisson = glm(fertility ~ g * religiousness + race + age, data = d_z_notfert, family = "poisson"),
  zeroinflation = pscl::zeroinfl(fertility ~ g * religiousness + race + age, data = d_z_notfert),
  hurdle = pscl::hurdle(fertility ~ g * religiousness + race + age, data = d_z_notfert)
)

map(count_models, summary)
## $poisson
## 
## Call:
## glm(formula = fertility ~ g * religiousness + race + age, family = "poisson", 
##     data = d_z_notfert)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.5638     0.0126   44.78  < 2e-16 ***
## g                -0.0381     0.0115   -3.32  0.00089 ***
## religiousness     0.1174     0.0113   10.34  < 2e-16 ***
## raceBlack         0.1641     0.0349    4.70  2.6e-06 ***
## raceHispanic      0.2239     0.0493    4.54  5.6e-06 ***
## raceAsian         0.0113     0.1308    0.09  0.93094    
## raceNative        0.0844     0.1052    0.80  0.42214    
## age               0.0329     0.0111    2.97  0.00296 ** 
## g:religiousness   0.0402     0.0107    3.77  0.00016 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 5431.5  on 4454  degrees of freedom
## Residual deviance: 5210.5  on 4446  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 14663
## 
## Number of Fisher Scoring iterations: 5
## 
## 
## $zeroinflation
## 
## Call:
## pscl::zeroinfl(formula = fertility ~ g * religiousness + race + age, 
##     data = d_z_notfert)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5594 -0.6607  0.0561  0.5751  6.6944 
## 
## Count model coefficients (poisson with log link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.6266     0.0155   40.31  < 2e-16 ***
## g                -0.0189     0.0142   -1.33   0.1841    
## religiousness     0.0937     0.0138    6.81  9.7e-12 ***
## raceBlack         0.1788     0.0407    4.39  1.1e-05 ***
## raceHispanic      0.2501     0.0569    4.39  1.1e-05 ***
## raceAsian         0.0533     0.1528    0.35   0.7274    
## raceNative        0.0588     0.1161    0.51   0.6128    
## age               0.0393     0.0135    2.90   0.0037 ** 
## g:religiousness   0.0279     0.0129    2.17   0.0303 *  
## 
## Zero-inflation model coefficients (binomial with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -2.8624     0.1915  -14.95   <2e-16 ***
## g                 0.2797     0.1752    1.60    0.110    
## religiousness    -0.3589     0.1526   -2.35    0.019 *  
## raceBlack         0.2719     0.4944    0.55    0.582    
## raceHispanic      0.4616     0.5308    0.87    0.385    
## raceAsian         0.5186     1.0014    0.52    0.605    
## raceNative       -0.6580     1.8853   -0.35    0.727    
## age               0.1094     0.1521    0.72    0.472    
## g:religiousness  -0.0955     0.1358   -0.70    0.482    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 29 
## Log-likelihood: -7.29e+03 on 18 Df
## 
## $hurdle
## 
## Call:
## pscl::hurdle(formula = fertility ~ g * religiousness + race + age, data = d_z_notfert)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5485 -0.6656  0.0543  0.5765  6.6910 
## 
## Count model coefficients (truncated poisson with log link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.6271     0.0155   40.33  < 2e-16 ***
## g                -0.0186     0.0141   -1.33   0.1848    
## religiousness     0.0930     0.0139    6.67  2.5e-11 ***
## raceBlack         0.1827     0.0409    4.46  8.1e-06 ***
## raceHispanic      0.2509     0.0566    4.43  9.4e-06 ***
## raceAsian         0.0500     0.1592    0.31   0.7533    
## raceNative        0.0228     0.1287    0.18   0.8591    
## age               0.0401     0.0134    2.99   0.0028 ** 
## g:religiousness   0.0341     0.0131    2.61   0.0090 ** 
## Zero hurdle model coefficients (binomial with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.3780     0.0425   32.41  < 2e-16 ***
## g                -0.1204     0.0402   -2.99   0.0028 ** 
## religiousness     0.2730     0.0393    6.94  3.9e-12 ***
## raceBlack         0.2196     0.1417    1.55   0.1211    
## raceHispanic      0.2513     0.2056    1.22   0.2218    
## raceAsian        -0.0884     0.4129   -0.21   0.8304    
## raceNative        0.4104     0.4150    0.99   0.3227    
## age               0.0280     0.0384    0.73   0.4659    
## g:religiousness   0.0593     0.0373    1.59   0.1120    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 15 
## Log-likelihood: -7.29e+03 on 18 Df
#visualize
p_poisson = ggpredict(count_models$poisson, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("g") +
  scale_y_continuous("Number of children at age 38") +
  ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_poisson

p_zeroinf = ggpredict(count_models$zeroinflation, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("g") +
  scale_y_continuous(NULL) +
  ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_zeroinf

p_count = ggpredict(count_models$hurdle, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("g") +
  scale_y_continuous(NULL) +
  ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_count

(p_poisson + theme(legend.position = "none")) + (p_zeroinf  + theme(legend.position = "none")) + p_count

GG_save("figs/count_models.png")

Jensen method

d_jensen_method = bind_cols(
  g_test_data_imp,
  fertility = d$fertility
)

fa_Jensens_method(fa_g, df = d_jensen_method, "fertility") +
  scale_y_continuous("Correlation of test with fertility") +
  scale_x_continuous("Loading on the general factor of intelligence")
## Using Pearson correlations for the criterion-indicators relationships.
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/jensen_method.png")
## `geom_smooth()` using formula = 'y ~ x'
#extrapolate to g-loading = 1
d_jensen_method_data = tibble(
  test = g_tests,
  g_loading = fa_g$loadings[, 1],
  r_fertility = map_dbl(g_test_data_imp, ~wtd.cors(d$fertility, .))
)

#linear model
(jensen_fit = ols(r_fertility ~ g_loading, data = d_jensen_method_data))
## Linear Regression Model
## 
## ols(formula = r_fertility ~ g_loading, data = d_jensen_method_data)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs      19    LR chi2     14.27    R2       0.528    
## sigma0.0279    d.f.            1    R2 adj   0.500    
## d.f.     17    Pr(> chi2) 0.0002    g        0.034    
## 
## Residuals
## 
##       Min        1Q    Median        3Q       Max 
## -0.046215 -0.021412  0.002139  0.018504  0.051269 
## 
## 
##           Coef    S.E.   t     Pr(>|t|)
## Intercept  0.0380 0.0247  1.54 0.1427  
## g_loading -0.1669 0.0383 -4.36 0.0004
#slope + intercept
coef(jensen_fit)[1] + coef(jensen_fit)[2]
## Intercept 
##    -0.129

Meta

write_sessioninfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_DK.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_DK.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_DK.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] patchwork_1.1.2       pscl_1.5.5.1          ggeffects_1.2.3      
##  [4] lavaan_0.6-17         readxl_1.4.2          mirt_1.39            
##  [7] lattice_0.22-5        rms_6.7-0             kirkegaard_2023-04-30
## [10] psych_2.3.3           assertthat_0.2.1      weights_1.0.4        
## [13] Hmisc_5.1-0           magrittr_2.0.3        lubridate_1.9.2      
## [16] forcats_1.0.0         stringr_1.5.0         dplyr_1.1.2          
## [19] purrr_1.0.1           readr_2.1.4           tidyr_1.3.0          
## [22] tibble_3.2.1          ggplot2_3.4.2         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] vcd_1.4-12           rstudioapi_0.14      jsonlite_1.8.5      
##   [4] shape_1.4.6          TH.data_1.1-2        jomo_2.7-6          
##   [7] farver_2.1.1         nloptr_2.0.3         rmarkdown_2.22      
##  [10] ragg_1.2.5           vctrs_0.6.3          minqa_1.2.5         
##  [13] base64enc_0.1-3      htmltools_0.5.5      polspline_1.1.22    
##  [16] haven_2.5.2          broom_1.0.5          cellranger_1.1.0    
##  [19] Formula_1.2-5        mitml_0.4-5          dcurver_0.9.2       
##  [22] sass_0.4.6           bslib_0.5.0          htmlwidgets_1.6.2   
##  [25] plyr_1.8.8           sandwich_3.0-2       zoo_1.8-12          
##  [28] cachem_1.0.8         admisc_0.34          lifecycle_1.0.3     
##  [31] iterators_1.0.14     pkgconfig_2.0.3      sjlabelled_1.2.0    
##  [34] Matrix_1.6-3         R6_2.5.1             fastmap_1.1.1       
##  [37] digest_0.6.31        colorspace_2.1-0     textshaping_0.3.6   
##  [40] vegan_2.6-4          labeling_0.4.2       fansi_1.0.4         
##  [43] timechange_0.2.0     gdata_2.19.0         abind_1.4-5         
##  [46] mgcv_1.9-1           compiler_4.3.2       proxy_0.4-27        
##  [49] bit64_4.0.5          withr_2.5.0          htmlTable_2.4.1     
##  [52] backports_1.4.1      carData_3.0-5        highr_0.10          
##  [55] pan_1.6              MASS_7.3-60          quantreg_5.95       
##  [58] GPArotation_2023.3-1 gtools_3.9.4         permute_0.9-7       
##  [61] tools_4.3.2          pbivnorm_0.6.0       lmtest_0.9-40       
##  [64] ranger_0.16.0        foreign_0.8-86       nnet_7.3-19         
##  [67] glue_1.6.2           quadprog_1.5-8       nlme_3.1-163        
##  [70] grid_4.3.2           checkmate_2.2.0      cluster_2.1.6       
##  [73] generics_0.1.3       gtable_0.3.3         tzdb_0.4.0          
##  [76] class_7.3-22         data.table_1.14.8    hms_1.1.3           
##  [79] car_3.1-2            sp_2.1-2             Deriv_4.1.3         
##  [82] utf8_1.2.3           foreach_1.5.2        pillar_1.9.0        
##  [85] vroom_1.6.3          VIM_6.2.2            robustbase_0.99-2   
##  [88] splines_4.3.2        survival_3.5-7       bit_4.0.5           
##  [91] SparseM_1.81         tidyselect_1.2.0     pbapply_1.7-0       
##  [94] knitr_1.43           gridExtra_2.3        xfun_0.39           
##  [97] Rcsdp_0.1.57.5       DEoptimR_1.1-3       stringi_1.7.12      
## [100] yaml_2.3.7           boot_1.3-28          evaluate_0.21       
## [103] codetools_0.2-19     laeken_0.5.3         cli_3.6.1           
## [106] rpart_4.1.23         systemfonts_1.0.4    munsell_0.5.0       
## [109] jquerylib_0.1.4      Rcpp_1.0.10          polycor_0.8-1       
## [112] parallel_4.3.2       MatrixModels_0.5-1   lme4_1.1-33         
## [115] glmnet_4.1-7         mvtnorm_1.2-2        e1071_1.7-14        
## [118] scales_1.2.1         insight_0.19.7       crayon_1.5.2        
## [121] rlang_1.1.1          multcomp_1.4-24      mnormt_2.1.1        
## [124] mice_3.16.0
#save data
write_rds(d, "data/data_out.rds", compress = "xz")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/agw8s/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}