Analyses
Descriptives
#race distribution
d$race %>% table2()
#age summary stats
d$age %>% describe2()
d$fertility %>% describe2()
d %>%
ggplot(aes(fertility)) +
geom_bar(aes(y = after_stat(count)/sum(after_stat(count)))) +
scale_x_continuous("Number of children (self-report at age 38)", breaks = seq(0, 100)) +
scale_y_continuous("Percentage", labels = scales::percent)
## Warning: Removed 7 rows containing non-finite values (`stat_count()`).

GG_save("figs/dist_fertility.png")
## Warning: Removed 7 rows containing non-finite values (`stat_count()`).
Religiousness
Select items, factor analyze score.
#items and nice names
#these were suitable items that found in a search for god, and relig
#Dutton says:
#27; 50; 53; 58; 95; 98; 115; 209; 249; 258; 369; 476; 483; 490; 491
#and, debatably, 106 and 202
#Emil picks
religion_items = c(
priest_healing = 53,
prophets_accurate = 58,
church_almost_every_week = 95,
believe_in_second_life = 115,
very_religious = 206,
belief_in_devil_and_hell = 249,
belief_in_god = 258,
only_one_true_religion = 373,
special_agent_god = 476,
christ_miracles = 483,
read_bible_often = 490,
no_patience_people_true_believers = 491
)
#prevalence (proportion TRUE)
MMPI_questions$prevalence = colMeans(MMPI_items, na.rm = T)
MMPI_questions$missing = miss_by_var(MMPI_items, prop = T)
#our subset
MMPI_questions[religion_items, ]
#copy to main
for (i in seq_along(religion_items)) {
d[[names(religion_items)[i]]] = MMPI_items[[religion_items[i]]]
}
#religion data
religion_data = MMPI_items[religion_items] %>%
set_names(names(religion_items))
#missing
religion_data_miss = miss_by_case(religion_data, reverse = T)
#correlations
religion_data %>% mixedCor()
## Call: mixedCor(data = .)
## prst_ prph_ ch___ bl___ vry_r b____ blf__
## priest_healing 1.00
## prophets_accurate 0.57 1.00
## church_almost_every_week 0.33 0.51 1.00
## believe_in_second_life 0.40 0.53 0.50 1.00
## very_religious 0.39 0.53 0.68 0.44 1.00
## belief_in_devil_and_hell 0.45 0.63 0.52 0.74 0.46 1.00
## belief_in_god 0.38 0.66 0.57 0.82 0.58 0.85 1.00
## only_one_true_religion 0.36 0.59 0.41 0.36 0.52 0.53 0.58
## special_agent_god 0.34 0.45 0.38 0.35 0.55 0.35 0.34
## christ_miracles 0.48 0.56 0.55 0.67 0.50 0.70 0.84
## read_bible_often 0.49 0.69 0.79 0.48 0.76 0.54 0.56
## no_patience_people_true_believers -0.17 -0.29 -0.30 -0.23 -0.27 -0.31 -0.29
## on___ spc__ chrs_ rd_b_
## priest_healing
## prophets_accurate
## church_almost_every_week
## believe_in_second_life
## very_religious
## belief_in_devil_and_hell
## belief_in_god
## only_one_true_religion 1.00
## special_agent_god 0.43 1.00
## christ_miracles 0.45 0.33 1.00
## read_bible_often 0.57 0.54 0.57 1.00
## no_patience_people_true_believers -0.43 -0.09 -0.25 -0.34
## [1] 1.00
#MIRT
religion_fit = mirt(religion_data, model = 1, technical = list(removeEmptyRows = T))
## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders
##
Iteration: 1, Log-Lik: -24018.628, Max-Change: 1.29602
Iteration: 2, Log-Lik: -22252.825, Max-Change: 1.15131
Iteration: 3, Log-Lik: -21852.463, Max-Change: 1.10645
Iteration: 4, Log-Lik: -21769.643, Max-Change: 0.38262
Iteration: 5, Log-Lik: -21734.538, Max-Change: 0.40918
Iteration: 6, Log-Lik: -21716.733, Max-Change: 0.41660
Iteration: 7, Log-Lik: -21705.239, Max-Change: 0.24595
Iteration: 8, Log-Lik: -21699.488, Max-Change: 0.16613
Iteration: 9, Log-Lik: -21696.375, Max-Change: 0.08014
Iteration: 10, Log-Lik: -21695.420, Max-Change: 0.06825
Iteration: 11, Log-Lik: -21694.184, Max-Change: 0.06579
Iteration: 12, Log-Lik: -21693.506, Max-Change: 0.03398
Iteration: 13, Log-Lik: -21693.123, Max-Change: 0.01961
Iteration: 14, Log-Lik: -21692.941, Max-Change: 0.01109
Iteration: 15, Log-Lik: -21692.840, Max-Change: 0.00900
Iteration: 16, Log-Lik: -21692.697, Max-Change: 0.00249
Iteration: 17, Log-Lik: -21692.693, Max-Change: 0.00122
Iteration: 18, Log-Lik: -21692.690, Max-Change: 0.00116
Iteration: 19, Log-Lik: -21692.683, Max-Change: 0.00029
Iteration: 20, Log-Lik: -21692.683, Max-Change: 0.00028
Iteration: 21, Log-Lik: -21692.683, Max-Change: 0.00025
Iteration: 22, Log-Lik: -21692.682, Max-Change: 0.00019
Iteration: 23, Log-Lik: -21692.682, Max-Change: 0.00093
Iteration: 24, Log-Lik: -21692.682, Max-Change: 0.00044
Iteration: 25, Log-Lik: -21692.682, Max-Change: 0.00021
Iteration: 26, Log-Lik: -21692.682, Max-Change: 0.00077
Iteration: 27, Log-Lik: -21692.682, Max-Change: 0.00067
Iteration: 28, Log-Lik: -21692.682, Max-Change: 0.00020
Iteration: 29, Log-Lik: -21692.682, Max-Change: 0.00069
Iteration: 30, Log-Lik: -21692.682, Max-Change: 0.00064
Iteration: 31, Log-Lik: -21692.681, Max-Change: 0.00018
Iteration: 32, Log-Lik: -21692.681, Max-Change: 0.00063
Iteration: 33, Log-Lik: -21692.681, Max-Change: 0.00057
Iteration: 34, Log-Lik: -21692.681, Max-Change: 0.00016
Iteration: 35, Log-Lik: -21692.681, Max-Change: 0.00058
Iteration: 36, Log-Lik: -21692.681, Max-Change: 0.00051
Iteration: 37, Log-Lik: -21692.681, Max-Change: 0.00015
Iteration: 38, Log-Lik: -21692.681, Max-Change: 0.00054
Iteration: 39, Log-Lik: -21692.681, Max-Change: 0.00047
Iteration: 40, Log-Lik: -21692.681, Max-Change: 0.00014
Iteration: 41, Log-Lik: -21692.681, Max-Change: 0.00050
Iteration: 42, Log-Lik: -21692.681, Max-Change: 0.00044
Iteration: 43, Log-Lik: -21692.681, Max-Change: 0.00013
Iteration: 44, Log-Lik: -21692.681, Max-Change: 0.00046
Iteration: 45, Log-Lik: -21692.681, Max-Change: 0.00042
Iteration: 46, Log-Lik: -21692.681, Max-Change: 0.00013
Iteration: 47, Log-Lik: -21692.681, Max-Change: 0.00044
Iteration: 48, Log-Lik: -21692.681, Max-Change: 0.00039
Iteration: 49, Log-Lik: -21692.681, Max-Change: 0.00012
Iteration: 50, Log-Lik: -21692.681, Max-Change: 0.00043
Iteration: 51, Log-Lik: -21692.681, Max-Change: 0.00037
Iteration: 52, Log-Lik: -21692.681, Max-Change: 0.00012
Iteration: 53, Log-Lik: -21692.681, Max-Change: 0.00043
Iteration: 54, Log-Lik: -21692.681, Max-Change: 0.00036
Iteration: 55, Log-Lik: -21692.680, Max-Change: 0.00011
Iteration: 56, Log-Lik: -21692.680, Max-Change: 0.00042
Iteration: 57, Log-Lik: -21692.680, Max-Change: 0.00034
Iteration: 58, Log-Lik: -21692.680, Max-Change: 0.00011
Iteration: 59, Log-Lik: -21692.680, Max-Change: 0.00041
Iteration: 60, Log-Lik: -21692.680, Max-Change: 0.00032
Iteration: 61, Log-Lik: -21692.680, Max-Change: 0.00010
Iteration: 62, Log-Lik: -21692.680, Max-Change: 0.00041
Iteration: 63, Log-Lik: -21692.680, Max-Change: 0.00031
Iteration: 64, Log-Lik: -21692.680, Max-Change: 0.00010
religion_fit
##
## Call:
## mirt(data = religion_data, model = 1, technical = list(removeEmptyRows = T))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 64 EM iterations.
## mirt version: 1.39
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -21693
## Estimated parameters: 24
## AIC = 43433
## BIC = 43587; SABIC = 43511
religion_fit %>% summary()
## F1 h2
## priest_healing 0.599 0.359
## prophets_accurate 0.757 0.573
## church_almost_every_week 0.763 0.582
## believe_in_second_life 0.779 0.607
## very_religious 0.787 0.619
## belief_in_devil_and_hell 0.830 0.688
## belief_in_god 0.954 0.909
## only_one_true_religion 0.649 0.421
## special_agent_god 0.603 0.364
## christ_miracles 0.811 0.658
## read_bible_often 0.913 0.834
## no_patience_people_true_believers -0.360 0.130
##
## SS loadings: 6.74
## Proportion Var: 0.562
##
## Factor correlations:
##
## F1
## F1 1
religion_fit_scores = fscores(religion_fit, full.scores = T, full.scores.SE = T)
d$religiousness = NA
d$religiousness[religion_data_miss != 0] = religion_fit_scores[, 1]
d$religiousness %<>% standardize(focal_group = (d$race == "White"))
#distribution
d$religiousness %>% GG_denhist() +
scale_x_continuous("Religiousness (factor score)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/religiousness_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#psych package (just for verification)
religion_fit_psych = irt.fa(religion_data)

religion_fit_psych$fa
## Factor Analysis using method = minres
## Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate,
## fm = fm)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## priest_healing 0.56 0.31 0.69 1
## prophets_accurate 0.78 0.61 0.39 1
## church_almost_every_week 0.73 0.53 0.47 1
## believe_in_second_life 0.73 0.53 0.47 1
## very_religious 0.73 0.54 0.46 1
## belief_in_devil_and_hell 0.81 0.65 0.35 1
## belief_in_god 0.87 0.76 0.24 1
## only_one_true_religion 0.67 0.44 0.56 1
## special_agent_god 0.53 0.28 0.72 1
## christ_miracles 0.79 0.62 0.38 1
## read_bible_often 0.82 0.68 0.32 1
## no_patience_people_true_believers -0.37 0.14 0.86 1
##
## MR1
## SS loadings 6.10
## Proportion Var 0.51
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 66 with the objective function = 9.62 with Chi Square = 42859
## df of the model are 54 and the objective function was 3.15
##
## The root mean square of the residuals (RMSR) is 0.09
## The df corrected root mean square of the residuals is 0.1
##
## The harmonic n.obs is 4462 with the empirical chi square 4638 with prob < 0
## The total n.obs was 4462 with Likelihood Chi Square = 14046 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.6
## RMSEA index = 0.241 and the 90 % confidence intervals are 0.238 0.244
## BIC = 13592
## Fit based upon off diagonal values = 0.97
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.97
## Multiple R square of scores with factors 0.95
## Minimum correlation of possible factor scores 0.89
religion_fit_psych_scores = scoreIrt(religion_fit_psych, religion_data)
d$religiousness2 = religion_fit_psych_scores[, 1]
d$religiousness2 %<>% standardize(focal_group = (d$race == "White"))
#scores match?
cor.test(d$religiousness, d$religiousness2)
##
## Pearson's product-moment correlation
##
## data: d$religiousness and d$religiousness2
## t = 379, df = 4460, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.984 0.986
## sample estimates:
## cor
## 0.985
ggplot(d, aes(religiousness, religiousness2)) +
geom_count() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#reliability
alpha(religion_data, check.keys=TRUE)
## Warning in alpha(religion_data, check.keys = TRUE): Some items were negatively correlated with total scale and were automatically reversed.
## This is indicated by a negative sign for the variable name.
##
## Reliability analysis
## Call: alpha(x = religion_data, check.keys = TRUE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.79 0.79 0.8 0.24 3.8 0.0044 0.45 0.22 0.2
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.78 0.79 0.8
## Duhachek 0.78 0.79 0.8
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## priest_healing 0.79 0.79 0.80 0.26 3.8
## prophets_accurate 0.76 0.76 0.78 0.23 3.2
## church_almost_every_week 0.77 0.77 0.78 0.23 3.4
## believe_in_second_life 0.77 0.77 0.78 0.23 3.4
## very_religious 0.78 0.77 0.78 0.24 3.4
## belief_in_devil_and_hell 0.76 0.76 0.77 0.23 3.2
## belief_in_god 0.78 0.78 0.78 0.24 3.5
## only_one_true_religion 0.77 0.77 0.78 0.24 3.4
## special_agent_god 0.79 0.79 0.80 0.25 3.8
## christ_miracles 0.77 0.77 0.78 0.23 3.3
## read_bible_often 0.77 0.77 0.78 0.23 3.3
## no_patience_people_true_believers- 0.80 0.79 0.80 0.26 3.9
## alpha se var.r med.r
## priest_healing 0.0045 0.014 0.24
## prophets_accurate 0.0051 0.015 0.19
## church_almost_every_week 0.0048 0.014 0.20
## believe_in_second_life 0.0048 0.013 0.20
## very_religious 0.0048 0.014 0.20
## belief_in_devil_and_hell 0.0051 0.013 0.20
## belief_in_god 0.0047 0.012 0.23
## only_one_true_religion 0.0048 0.016 0.19
## special_agent_god 0.0045 0.014 0.24
## christ_miracles 0.0049 0.013 0.20
## read_bible_often 0.0048 0.013 0.20
## no_patience_people_true_believers- 0.0043 0.014 0.24
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## priest_healing 4419 0.37 0.41 0.31 0.27 0.083 0.28
## prophets_accurate 4292 0.67 0.65 0.61 0.55 0.505 0.50
## church_almost_every_week 4461 0.60 0.59 0.55 0.48 0.250 0.43
## believe_in_second_life 4410 0.60 0.59 0.55 0.48 0.756 0.43
## very_religious 4449 0.56 0.58 0.53 0.46 0.171 0.38
## belief_in_devil_and_hell 4413 0.68 0.66 0.63 0.57 0.630 0.48
## belief_in_god 4426 0.51 0.53 0.48 0.42 0.915 0.28
## only_one_true_religion 4423 0.61 0.58 0.53 0.47 0.374 0.48
## special_agent_god 4442 0.38 0.42 0.32 0.28 0.089 0.28
## christ_miracles 4328 0.61 0.61 0.57 0.50 0.766 0.42
## read_bible_often 4452 0.58 0.60 0.57 0.49 0.126 0.33
## no_patience_people_true_believers- 4429 0.40 0.38 0.27 0.25 0.734 0.44
##
## Non missing response frequency for each item
## 0 1 miss
## priest_healing 0.92 0.08 0.01
## prophets_accurate 0.50 0.50 0.04
## church_almost_every_week 0.75 0.25 0.00
## believe_in_second_life 0.24 0.76 0.01
## very_religious 0.83 0.17 0.00
## belief_in_devil_and_hell 0.37 0.63 0.01
## belief_in_god 0.08 0.92 0.01
## only_one_true_religion 0.63 0.37 0.01
## special_agent_god 0.91 0.09 0.00
## christ_miracles 0.23 0.77 0.03
## read_bible_often 0.87 0.13 0.00
## no_patience_people_true_believers 0.73 0.27 0.01
empirical_rxx(religion_fit_scores)
## F1
## 0.836
marginal_rxx(religion_fit)
## [1] 0.834
plot(religion_fit, type = "rxx")

# save_plot_to_file(
# plot(religion_fit, type = "rxx"),
# filename = "figs/religiousness_reliability.png")
#religiousness by race
GG_group_means(d, "religiousness", "race", type = "boxplot")

SMD_matrix(-d$religiousness, d$race, extended_output = T)
## $d
## White Black Hispanic Asian Native
## White NA 0.352 0.1544 -0.209 0.0663
## Black 0.3522 NA -0.1978 -0.561 -0.2859
## Hispanic 0.1544 -0.198 NA -0.364 -0.0881
## Asian -0.2091 -0.561 -0.3635 NA 0.2755
## Native 0.0663 -0.286 -0.0881 0.275 NA
##
## $d_string
## White Black Hispanic
## White NA "0.35 [0.26 0.44]" "0.15 [0.01 0.30]"
## Black "0.35 [0.26 0.44]" NA "-0.20 [-0.36 -0.03]"
## Hispanic "0.15 [0.01 0.30]" "-0.20 [-0.36 -0.03]" NA
## Asian "-0.21 [-0.55 0.13]" "-0.56 [-0.91 -0.21]" "-0.36 [-0.73 0.00]"
## Native "0.07 [-0.22 0.35]" "-0.29 [-0.58 0.01]" "-0.09 [-0.40 0.22]"
## Asian Native
## White "-0.21 [-0.55 0.13]" "0.07 [-0.22 0.35]"
## Black "-0.56 [-0.91 -0.21]" "-0.29 [-0.58 0.01]"
## Hispanic "-0.36 [-0.73 0.00]" "-0.09 [-0.40 0.22]"
## Asian NA "0.28 [-0.16 0.71]"
## Native "0.28 [-0.16 0.71]" NA
##
## $CI_lower
## White Black Hispanic Asian Native
## White NA 0.260 0.012 -0.547 -0.216
## Black 0.260 NA -0.361 -0.910 -0.579
## Hispanic 0.012 -0.361 NA -0.729 -0.401
## Asian -0.547 -0.910 -0.729 NA -0.164
## Native -0.216 -0.579 -0.401 -0.164 NA
##
## $CI_upper
## White Black Hispanic Asian Native
## White NA 0.44403 0.29676 0.12858 0.34820
## Black 0.444 NA -0.03467 -0.21297 0.00733
## Hispanic 0.297 -0.03467 NA 0.00155 0.22446
## Asian 0.129 -0.21297 0.00155 NA 0.71500
## Native 0.348 0.00733 0.22446 0.71500 NA
##
## $se_z
## [1] 1.96
##
## $se
## White Black Hispanic Asian Native
## White NA 0.0468 0.0726 0.172 0.144
## Black 0.0468 NA 0.0833 0.178 0.150
## Hispanic 0.0726 0.0833 NA 0.186 0.159
## Asian 0.1723 0.1778 0.1863 NA 0.224
## Native 0.1438 0.1496 0.1594 0.224 NA
##
## $p
## White Black Hispanic Asian Native
## White NA 5.43e-14 0.0336 0.22485 0.645
## Black 5.43e-14 NA 0.0175 0.00159 0.056
## Hispanic 3.36e-02 1.75e-02 NA 0.05098 0.581
## Asian 2.25e-01 1.59e-03 0.0510 NA 0.219
## Native 6.45e-01 5.60e-02 0.5808 0.21929 NA
##
## $pairwise_n
## White Black Hispanic Asian Native
## White NA 4179 3854 3688 3703
## Black 4179 NA 725 559 574
## Hispanic 3854 725 NA 234 249
## Asian 3688 559 234 NA 83
## Native 3703 574 249 83 NA
Intelligence
#g tests
g_tests_early = c("VE_time1", "AR_time1", "PA", "GIT", "AFQT")
g_tests_later = c("VE_time2", "AR_time2", "WAIS_BD", "WAIS_GI", "WRAT", "PASAT", "WLGT", "copy_direct", "copy_immediate", "copy_delayed", "CVLT", "WCST", "GPT_left", "GPT_right")
g_tests = c(g_tests_early, g_tests_later)
#impute missing data
g_test_data_imp = d[g_tests] %>% miss_impute()
#standardize test variables
for (v in g_tests) d[[v]] = d[[v]] %>% standardize(focal_group = d$race == "White")
#all tests g
fa_g = fa(g_test_data_imp)
fa_g
## Factor Analysis using method = minres
## Call: fa(r = g_test_data_imp)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time1 0.82 0.67 0.33 1
## AR_time1 0.81 0.66 0.34 1
## PA 0.71 0.50 0.50 1
## GIT 0.69 0.48 0.52 1
## AFQT 0.85 0.73 0.27 1
## VE_time2 0.82 0.67 0.33 1
## AR_time2 0.82 0.67 0.33 1
## WAIS_BD 0.67 0.45 0.55 1
## WAIS_GI 0.76 0.58 0.42 1
## WRAT 0.73 0.53 0.47 1
## PASAT 0.57 0.32 0.68 1
## WLGT 0.49 0.24 0.76 1
## copy_direct 0.47 0.22 0.78 1
## copy_immediate 0.55 0.30 0.70 1
## copy_delayed 0.55 0.30 0.70 1
## CVLT 0.42 0.18 0.82 1
## WCST 0.46 0.21 0.79 1
## GPT_left 0.34 0.12 0.88 1
## GPT_right 0.33 0.11 0.89 1
##
## MR1
## SS loadings 7.94
## Proportion Var 0.42
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 171 with the objective function = 12.6 with Chi Square = 56117
## df of the model are 152 and the objective function was 3.89
##
## The root mean square of the residuals (RMSR) is 0.09
## The df corrected root mean square of the residuals is 0.1
##
## The harmonic n.obs is 4462 with the empirical chi square 13120 with prob < 0
## The total n.obs was 4462 with Likelihood Chi Square = 17324 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.655
## RMSEA index = 0.159 and the 90 % confidence intervals are 0.157 0.161
## BIC = 16047
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.97
## Multiple R square of scores with factors 0.95
## Minimum correlation of possible factor scores 0.89
#earlier tests only
fa_g_time1 = fa(g_test_data_imp[g_tests_early])
fa_g_time1
## Factor Analysis using method = minres
## Call: fa(r = g_test_data_imp[g_tests_early])
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time1 0.82 0.67 0.33 1
## AR_time1 0.82 0.68 0.32 1
## PA 0.70 0.49 0.51 1
## GIT 0.73 0.53 0.47 1
## AFQT 0.92 0.85 0.15 1
##
## MR1
## SS loadings 3.22
## Proportion Var 0.64
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 10 with the objective function = 3.14 with Chi Square = 13984
## df of the model are 5 and the objective function was 0.16
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 4462 with the empirical chi square 175 with prob < 6.6e-36
## The total n.obs was 4462 with Likelihood Chi Square = 711 with prob < 1.7e-151
##
## Tucker Lewis Index of factoring reliability = 0.899
## RMSEA index = 0.178 and the 90 % confidence intervals are 0.167 0.189
## BIC = 669
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.96
## Multiple R square of scores with factors 0.93
## Minimum correlation of possible factor scores 0.85
#later tests only
fa_g_time2 = fa(g_test_data_imp[g_tests_later])
fa_g_time2
## Factor Analysis using method = minres
## Call: fa(r = g_test_data_imp[g_tests_later])
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time2 0.78 0.61 0.39 1
## AR_time2 0.79 0.62 0.38 1
## WAIS_BD 0.67 0.45 0.55 1
## WAIS_GI 0.72 0.52 0.48 1
## WRAT 0.71 0.50 0.50 1
## PASAT 0.58 0.33 0.67 1
## WLGT 0.50 0.25 0.75 1
## copy_direct 0.51 0.26 0.74 1
## copy_immediate 0.61 0.37 0.63 1
## copy_delayed 0.61 0.37 0.63 1
## CVLT 0.45 0.20 0.80 1
## WCST 0.47 0.22 0.78 1
## GPT_left 0.38 0.14 0.86 1
## GPT_right 0.37 0.13 0.87 1
##
## MR1
## SS loadings 5.01
## Proportion Var 0.36
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 91 with the objective function = 7.17 with Chi Square = 31949
## df of the model are 77 and the objective function was 2.8
##
## The root mean square of the residuals (RMSR) is 0.11
## The df corrected root mean square of the residuals is 0.12
##
## The harmonic n.obs is 4462 with the empirical chi square 9397 with prob < 0
## The total n.obs was 4462 with Likelihood Chi Square = 12486 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.54
## RMSEA index = 0.19 and the 90 % confidence intervals are 0.187 0.193
## BIC = 11839
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.95
## Multiple R square of scores with factors 0.90
## Minimum correlation of possible factor scores 0.81
#save scores, standardize to white
d$g = fa_g$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time1 = fa_g_time1$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time2 = fa_g_time2$scores[, 1] %>% standardize(focal_group = d$race == "White")
#IQ scale
d$IQ = d$g * 15 + 100
#plot dist
GG_denhist(d, "g", vline = F)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/gdist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
alpha(g_test_data_imp)
## Number of categories should be increased in order to count frequencies.
##
## Reliability analysis
## Call: alpha(x = g_test_data_imp)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.92 0.95 0.39 12 0.0023 42 9.6 0.34
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.85 0.85 0.86
## Duhachek 0.85 0.85 0.86
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## VE_time1 0.83 0.92 0.94 0.38 11 0.0026 0.026 0.34
## AR_time1 0.83 0.92 0.94 0.38 11 0.0027 0.027 0.33
## PA 0.84 0.92 0.94 0.38 11 0.0025 0.029 0.33
## GIT 0.84 0.92 0.94 0.39 11 0.0025 0.029 0.34
## AFQT 0.86 0.92 0.94 0.38 11 0.0023 0.027 0.33
## VE_time2 0.83 0.92 0.94 0.38 11 0.0027 0.026 0.34
## AR_time2 0.83 0.92 0.94 0.38 11 0.0028 0.027 0.33
## WAIS_BD 0.85 0.92 0.94 0.39 11 0.0023 0.030 0.33
## WAIS_GI 0.85 0.92 0.94 0.38 11 0.0023 0.028 0.33
## WRAT 0.84 0.92 0.94 0.38 11 0.0025 0.028 0.34
## PASAT 0.88 0.92 0.94 0.39 12 0.0017 0.031 0.34
## WLGT 0.85 0.92 0.95 0.40 12 0.0024 0.030 0.36
## copy_direct 0.85 0.92 0.95 0.40 12 0.0023 0.030 0.36
## copy_immediate 0.85 0.92 0.94 0.39 12 0.0024 0.029 0.36
## copy_delayed 0.85 0.92 0.94 0.39 12 0.0023 0.029 0.36
## CVLT 0.85 0.92 0.95 0.40 12 0.0023 0.029 0.37
## WCST 0.86 0.92 0.95 0.40 12 0.0023 0.030 0.37
## GPT_left 0.85 0.93 0.94 0.41 12 0.0023 0.027 0.37
## GPT_right 0.85 0.93 0.94 0.41 12 0.0023 0.027 0.37
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## VE_time1 4462 0.81 0.79 0.80 0.76 107.14 22.20
## AR_time1 4462 0.83 0.79 0.79 0.78 104.38 21.94
## PA 4462 0.69 0.72 0.71 0.62 104.26 22.56
## GIT 4462 0.70 0.69 0.67 0.64 102.02 18.33
## AFQT 4462 0.80 0.83 0.84 0.80 0.14 0.81
## VE_time2 4462 0.82 0.80 0.81 0.77 116.52 23.04
## AR_time2 4462 0.84 0.81 0.81 0.79 104.56 24.40
## WAIS_BD 4462 0.63 0.71 0.69 0.63 10.52 2.64
## WAIS_GI 4462 0.72 0.75 0.74 0.72 10.07 2.80
## WRAT 4462 0.75 0.73 0.72 0.71 61.16 14.74
## PASAT 4462 0.74 0.61 0.57 0.56 108.72 50.72
## WLGT 4462 0.53 0.53 0.49 0.49 35.12 10.92
## copy_direct 4462 0.44 0.54 0.49 0.43 32.73 3.31
## copy_immediate 4462 0.50 0.61 0.62 0.47 20.11 6.75
## copy_delayed 4462 0.50 0.61 0.62 0.47 20.27 6.33
## CVLT 4462 0.42 0.48 0.43 0.40 11.06 2.33
## WCST 4462 0.44 0.51 0.46 0.44 0.79 0.17
## GPT_left 4462 0.40 0.43 0.39 0.33 -77.38 13.76
## GPT_right 4462 0.39 0.42 0.38 0.33 -73.68 11.84
#method variance check
fa_g_all_methods = fa_all_methods(g_test_data_imp)
## 1 out of 40 - regression_minres
## Saving results from regression_minres
## 2 out of 40 - Thurstone_minres
## Saving results from Thurstone_minres
## 3 out of 40 - tenBerge_minres
## Saving results from tenBerge_minres
## 4 out of 40 - Anderson_minres
## Skipping Anderson_minres due to extraction error
## 5 out of 40 - Bartlett_minres
## Saving results from Bartlett_minres
## 6 out of 40 - regression_ols
## Saving results from regression_ols
## 7 out of 40 - Thurstone_ols
## Saving results from Thurstone_ols
## 8 out of 40 - tenBerge_ols
## Saving results from tenBerge_ols
## 9 out of 40 - Anderson_ols
## Skipping Anderson_ols due to extraction error
## 10 out of 40 - Bartlett_ols
## Saving results from Bartlett_ols
## 11 out of 40 - regression_wls
## Saving results from regression_wls
## 12 out of 40 - Thurstone_wls
## Saving results from Thurstone_wls
## 13 out of 40 - tenBerge_wls
## Saving results from tenBerge_wls
## 14 out of 40 - Anderson_wls
## Skipping Anderson_wls due to extraction error
## 15 out of 40 - Bartlett_wls
## Saving results from Bartlett_wls
## 16 out of 40 - regression_gls
## Saving results from regression_gls
## 17 out of 40 - Thurstone_gls
## Saving results from Thurstone_gls
## 18 out of 40 - tenBerge_gls
## Saving results from tenBerge_gls
## 19 out of 40 - Anderson_gls
## Skipping Anderson_gls due to extraction error
## 20 out of 40 - Bartlett_gls
## Saving results from Bartlett_gls
## 21 out of 40 - regression_pa
## Saving results from regression_pa
## 22 out of 40 - Thurstone_pa
## Saving results from Thurstone_pa
## 23 out of 40 - tenBerge_pa
## Saving results from tenBerge_pa
## 24 out of 40 - Anderson_pa
## Skipping Anderson_pa due to extraction error
## 25 out of 40 - Bartlett_pa
## Saving results from Bartlett_pa
## 26 out of 40 - regression_ml
## Saving results from regression_ml
## 27 out of 40 - Thurstone_ml
## Saving results from Thurstone_ml
## 28 out of 40 - tenBerge_ml
## Saving results from tenBerge_ml
## 29 out of 40 - Anderson_ml
## Skipping Anderson_ml due to extraction error
## 30 out of 40 - Bartlett_ml
## Saving results from Bartlett_ml
## 31 out of 40 - regression_minchi
## Saving results from regression_minchi
## 32 out of 40 - Thurstone_minchi
## Saving results from Thurstone_minchi
## 33 out of 40 - tenBerge_minchi
## Saving results from tenBerge_minchi
## 34 out of 40 - Anderson_minchi
## Skipping Anderson_minchi due to extraction error
## 35 out of 40 - Bartlett_minchi
## Saving results from Bartlett_minchi
## 36 out of 40 - regression_minrank
## Saving results from regression_minrank
## 37 out of 40 - Thurstone_minrank
## Saving results from Thurstone_minrank
## 38 out of 40 - tenBerge_minrank
## Saving results from tenBerge_minrank
## 39 out of 40 - Anderson_minrank
## Skipping Anderson_minrank due to extraction error
## 40 out of 40 - Bartlett_minrank
## Saving results from Bartlett_minrank
fa_g_all_methods$scores %>% GG_heatmap()

GG_save("figs/fa_methods.png")
fa_g_all_methods$scores %>%
cor() %>%
MAT_half(diag = F) %>%
describe2()
Main results
#correlations
d %>% select(fertility, g, religiousness, education, income) %>%
cor_matrix(p_val = T)
## fertility g religiousness education income
## fertility "1" "-0.11***" "0.17***" "-0.10***" "-0.02"
## g "-0.11***" "1" "-0.18***" "0.55***" "0.40***"
## religiousness "0.17***" "-0.18***" "1" "-0.09***" "-0.11***"
## education "-0.10***" "0.55***" "-0.09***" "1" "0.35***"
## income "-0.02" "0.40***" "-0.11***" "0.35***" "1"
d %>% select(fertility, g, religiousness, education, income) %>%
polycor::hetcor()
##
## Two-Step Estimates
##
## Correlations/Type of Correlation:
## fertility g religiousness education income
## fertility 1 Pearson Pearson Pearson Pearson
## g -0.108 1 Pearson Pearson Pearson
## religiousness 0.174 -0.184 1 Pearson Pearson
## education -0.105 0.545 -0.0907 1 Pearson
## income -0.0212 0.397 -0.108 0.349 1
##
## Standard Errors:
## fertility g religiousness education
## fertility
## g 0.0149
## religiousness 0.0147 0.0146
## education 0.015 0.0106 0.015
## income 0.0151 0.0127 0.0149 0.0133
##
## n = 4373
##
## P-values for Tests of Bivariate Normality:
## fertility g religiousness education
## fertility
## g 7.49e-56
## religiousness 5.62e-57 1.14e-29
## education 1.5e-290 6.04e-257 1.74e-258
## income 1.78e-69 4.34e-25 1.17e-27 9.71e-266
#is relationship linear?
GG_scatter(d, "religiousness", "fertility") +
geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/religiousness_fertility.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#models
#standardize variables of interest
d_z = d %>% select(fertility, fertility_confirmed, religiousness, g, age, race) %>% df_standardize() %>% mutate(g = standardize(g, focal_group = (race == "White")))
## Skipped race because it is class factor
d_z_white = d_z %>% filter(race == "White")
d_z_black = d_z %>% filter(race == "Black")
d_z_bw = bind_rows(d_z_white, d_z_black) %>% mutate(race = fct_drop(race))
main_models = list(
no_controls = ols(fertility ~ religiousness + g, data = d_z),
controls = ols(fertility ~ religiousness + g + race + age, data = d_z),
interact = ols(fertility ~ g * religiousness + race + age, data = d_z),
g_spline = ols(fertility ~ rcs(g) * religiousness + race + age, data = d_z),
reli_spline = ols(fertility ~ g * rcs(religiousness) + race + age, data = d_z),
both_spline = ols(fertility ~ rcs(g )* rcs(religiousness) + race + age, data = d_z),
white_interact = ols(fertility ~ g * religiousness + age, data = d_z_white),
black_interact = ols(fertility ~ g * religiousness + age, data = d_z_black),
race_3interact = ols(fertility ~ g * religiousness * race + age, data = d_z_bw),
interact_noage = ols(fertility ~ g * religiousness + race, data = d_z)
)
#p values for model comparisons
#test linear interaction
lrtest(main_models[[2]], main_models[[3]])
##
## Model 1: fertility ~ religiousness + g + race + age
## Model 2: fertility ~ g * religiousness + race + age
##
## L.R. Chisq d.f. P
## 9.66988 1.00000 0.00187
#test nonlinear g effect
lrtest(main_models[[3]], main_models[[4]])
##
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ rcs(g) * religiousness + race + age
##
## L.R. Chisq d.f. P
## 1.939 6.000 0.925
#test nonlinear religion effect
lrtest(main_models[[3]], main_models[[5]])
##
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ g * rcs(religiousness) + race + age
##
## L.R. Chisq d.f. P
## 9.362 6.000 0.154
#test nonlinear for both
lrtest(main_models[[3]], main_models[[6]])
##
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ rcs(g) * rcs(religiousness) + race + age
##
## L.R. Chisq d.f. P
## 17.366 21.000 0.689
#summarise
main_models[c(1:3, 7:8)] %>%
summarize_models(asterisks_only = F)
#visualize
#interaction with controls
ggpredict(main_models$interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("General intelligence (0 = White mean)") +
scale_y_continuous("Number of children at age 38") +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/g_fertility_interaction.png")
#whites
ggpredict(main_models$white_interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("General intelligence (0 = White mean)") +
scale_y_continuous("Number of children at age 38") +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/g_fertility_interaction_whites.png")
#using unstandardized units
unstd_interact_model = ols(fertility ~ IQ * religiousness + age + race, data = d)
unstd_interact_model
## Frequencies of Missing Values Due to Each Variable
## fertility IQ religiousness age race
## 7 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ IQ * religiousness + age + race, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 222.56 R2 0.049
## sigma1.3250 d.f. 8 R2 adj 0.047
## d.f. 4446 Pr(> chi2) 0.0000 g 0.337
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.60305 -0.97301 0.07147 0.83996 9.53461
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.2467 0.3223 3.87 0.0001
## IQ -0.0041 0.0014 -2.99 0.0028
## religiousness -0.1918 0.1320 -1.45 0.1463
## age 0.0242 0.0079 3.06 0.0022
## race=Black 0.3353 0.0672 4.99 <0.0001
## race=Hispanic 0.4560 0.0979 4.66 <0.0001
## race=Asian 0.0271 0.2284 0.12 0.9055
## race=Native 0.1552 0.1928 0.80 0.4210
## IQ * religiousness 0.0040 0.0013 3.11 0.0019
#visualize
#interaction with controls
ggpredict(unstd_interact_model, terms = c("IQ [70:130]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("General intelligence (100 = White mean)") +
scale_y_continuous("Number of children at age 38") +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Count variable models
#standardize non-fertility variables
d_z_notfert = d %>% select(fertility, religiousness, g, age, race) %>% df_standardize(exclude = "fertility") %>% mutate(g = standardize(g, focal_group = (race == "White")))
## Skipped fertility because it is in the exclude list
## Skipped race because it is class factor
#hurdle models
count_models = list(
poisson = glm(fertility ~ g * religiousness + race + age, data = d_z_notfert, family = "poisson"),
zeroinflation = pscl::zeroinfl(fertility ~ g * religiousness + race + age, data = d_z_notfert),
hurdle = pscl::hurdle(fertility ~ g * religiousness + race + age, data = d_z_notfert)
)
map(count_models, summary)
## $poisson
##
## Call:
## glm(formula = fertility ~ g * religiousness + race + age, family = "poisson",
## data = d_z_notfert)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5638 0.0126 44.78 < 2e-16 ***
## g -0.0381 0.0115 -3.32 0.00089 ***
## religiousness 0.1174 0.0113 10.34 < 2e-16 ***
## raceBlack 0.1641 0.0349 4.70 2.6e-06 ***
## raceHispanic 0.2239 0.0493 4.54 5.6e-06 ***
## raceAsian 0.0113 0.1308 0.09 0.93094
## raceNative 0.0844 0.1052 0.80 0.42214
## age 0.0329 0.0111 2.97 0.00296 **
## g:religiousness 0.0402 0.0107 3.77 0.00016 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 5431.5 on 4454 degrees of freedom
## Residual deviance: 5210.5 on 4446 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 14663
##
## Number of Fisher Scoring iterations: 5
##
##
## $zeroinflation
##
## Call:
## pscl::zeroinfl(formula = fertility ~ g * religiousness + race + age,
## data = d_z_notfert)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.5594 -0.6607 0.0561 0.5751 6.6944
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6266 0.0155 40.31 < 2e-16 ***
## g -0.0189 0.0142 -1.33 0.1841
## religiousness 0.0937 0.0138 6.81 9.7e-12 ***
## raceBlack 0.1788 0.0407 4.39 1.1e-05 ***
## raceHispanic 0.2501 0.0569 4.39 1.1e-05 ***
## raceAsian 0.0533 0.1528 0.35 0.7274
## raceNative 0.0588 0.1161 0.51 0.6128
## age 0.0393 0.0135 2.90 0.0037 **
## g:religiousness 0.0279 0.0129 2.17 0.0303 *
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.8624 0.1915 -14.95 <2e-16 ***
## g 0.2797 0.1752 1.60 0.110
## religiousness -0.3589 0.1526 -2.35 0.019 *
## raceBlack 0.2719 0.4944 0.55 0.582
## raceHispanic 0.4616 0.5308 0.87 0.385
## raceAsian 0.5186 1.0014 0.52 0.605
## raceNative -0.6580 1.8853 -0.35 0.727
## age 0.1094 0.1521 0.72 0.472
## g:religiousness -0.0955 0.1358 -0.70 0.482
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 29
## Log-likelihood: -7.29e+03 on 18 Df
##
## $hurdle
##
## Call:
## pscl::hurdle(formula = fertility ~ g * religiousness + race + age, data = d_z_notfert)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.5485 -0.6656 0.0543 0.5765 6.6910
##
## Count model coefficients (truncated poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6271 0.0155 40.33 < 2e-16 ***
## g -0.0186 0.0141 -1.33 0.1848
## religiousness 0.0930 0.0139 6.67 2.5e-11 ***
## raceBlack 0.1827 0.0409 4.46 8.1e-06 ***
## raceHispanic 0.2509 0.0566 4.43 9.4e-06 ***
## raceAsian 0.0500 0.1592 0.31 0.7533
## raceNative 0.0228 0.1287 0.18 0.8591
## age 0.0401 0.0134 2.99 0.0028 **
## g:religiousness 0.0341 0.0131 2.61 0.0090 **
## Zero hurdle model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3780 0.0425 32.41 < 2e-16 ***
## g -0.1204 0.0402 -2.99 0.0028 **
## religiousness 0.2730 0.0393 6.94 3.9e-12 ***
## raceBlack 0.2196 0.1417 1.55 0.1211
## raceHispanic 0.2513 0.2056 1.22 0.2218
## raceAsian -0.0884 0.4129 -0.21 0.8304
## raceNative 0.4104 0.4150 0.99 0.3227
## age 0.0280 0.0384 0.73 0.4659
## g:religiousness 0.0593 0.0373 1.59 0.1120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 15
## Log-likelihood: -7.29e+03 on 18 Df
#visualize
p_poisson = ggpredict(count_models$poisson, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("g") +
scale_y_continuous("Number of children at age 38") +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_poisson

p_zeroinf = ggpredict(count_models$zeroinflation, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("g") +
scale_y_continuous(NULL) +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_zeroinf

p_count = ggpredict(count_models$hurdle, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("g") +
scale_y_continuous(NULL) +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_count

(p_poisson + theme(legend.position = "none")) + (p_zeroinf + theme(legend.position = "none")) + p_count

GG_save("figs/count_models.png")
Alternative fertility variable
alt_fert_models = list(
no_controls = ols(fertility_confirmed ~ religiousness + g, data = d_z),
controls = ols(fertility_confirmed ~ religiousness + g + race + age, data = d_z),
interact = ols(fertility_confirmed ~ g * religiousness + race + age, data = d_z),
g_spline = ols(fertility_confirmed ~ rcs(g) * religiousness + race + age, data = d_z),
reli_spline = ols(fertility_confirmed ~ g * rcs(religiousness) + race + age, data = d_z),
both_spline = ols(fertility_confirmed ~ rcs(g )* rcs(religiousness) + race + age, data = d_z),
white_interact = ols(fertility_confirmed ~ g * religiousness + age, data = d_z_white),
black_interact = ols(fertility_confirmed ~ g * religiousness + age, data = d_z_black),
race_3interact = ols(fertility_confirmed ~ g * religiousness * race + age, data = d_z_bw),
interact_noage = ols(fertility_confirmed ~ g * religiousness + race, data = d_z)
)
#p values for model comparisons
#test linear interaction
lrtest(alt_fert_models[[2]], alt_fert_models[[3]])
##
## Model 1: fertility_confirmed ~ religiousness + g + race + age
## Model 2: fertility_confirmed ~ g * religiousness + race + age
##
## L.R. Chisq d.f. P
## 8.09640 1.00000 0.00444
#test nonlinear g effect
lrtest(alt_fert_models[[3]], alt_fert_models[[4]])
##
## Model 1: fertility_confirmed ~ g * religiousness + race + age
## Model 2: fertility_confirmed ~ rcs(g) * religiousness + race + age
##
## L.R. Chisq d.f. P
## 3.493 6.000 0.745
#test nonlinear religion effect
lrtest(alt_fert_models[[3]], alt_fert_models[[5]])
##
## Model 1: fertility_confirmed ~ g * religiousness + race + age
## Model 2: fertility_confirmed ~ g * rcs(religiousness) + race + age
##
## L.R. Chisq d.f. P
## 7.246 6.000 0.299
#test nonlinear for both
lrtest(alt_fert_models[[3]], alt_fert_models[[6]])
##
## Model 1: fertility_confirmed ~ g * religiousness + race + age
## Model 2: fertility_confirmed ~ rcs(g) * rcs(religiousness) + race + age
##
## L.R. Chisq d.f. P
## 18.990 21.000 0.586
#summarise
alt_fert_models[c(1:3, 7:8)] %>%
summarize_models(asterisks_only = F)
#visualize
#interaction with controls
ggpredict(alt_fert_models$interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("General intelligence (0 = White mean)") +
scale_y_continuous("Number of children at age 38 (confirmed)") +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Full model outputs
main_models
## $no_controls
## Frequencies of Missing Values Due to Each Variable
## fertility religiousness g
## 7 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ religiousness + g, data = d_z)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 163.77 R2 0.036
## sigma0.9820 d.f. 2 R2 adj 0.036
## d.f. 4452 Pr(> chi2) 0.0000 g 0.215
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.8522 -0.7269 0.0395 0.6593 7.2161
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0142 0.0150 -0.95 0.3423
## religiousness 0.1593 0.0150 10.64 <0.0001
## g -0.0731 0.0140 -5.23 <0.0001
##
##
## $controls
## Frequencies of Missing Values Due to Each Variable
## fertility religiousness g race age
## 7 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ religiousness + g + race + age, data = d_z)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 212.89 R2 0.047
## sigma0.9772 d.f. 7 R2 adj 0.045
## d.f. 4447 Pr(> chi2) 0.0000 g 0.244
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.93412 -0.72300 0.05617 0.60558 6.97791
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0524 0.0162 -3.24 0.0012
## religiousness 0.1549 0.0149 10.38 <0.0001
## g -0.0426 0.0151 -2.81 0.0049
## race=Black 0.2359 0.0494 4.77 <0.0001
## race=Hispanic 0.3365 0.0722 4.66 <0.0001
## race=Asian 0.0142 0.1684 0.08 0.9328
## race=Native 0.1026 0.1421 0.72 0.4703
## age 0.0440 0.0147 2.99 0.0028
##
##
## $interact
## Frequencies of Missing Values Due to Each Variable
## fertility g religiousness race age
## 7 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ g * religiousness + race + age, data = d_z)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 222.56 R2 0.049
## sigma0.9762 d.f. 8 R2 adj 0.047
## d.f. 4446 Pr(> chi2) 0.0000 g 0.248
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.91778 -0.71686 0.05265 0.61883 7.02457
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0453 0.0163 -2.78 0.0055
## g -0.0433 0.0151 -2.86 0.0043
## religiousness 0.1525 0.0149 10.21 <0.0001
## race=Black 0.2470 0.0495 4.99 <0.0001
## race=Hispanic 0.3360 0.0721 4.66 <0.0001
## race=Asian 0.0200 0.1683 0.12 0.9055
## race=Native 0.1143 0.1421 0.80 0.4210
## age 0.0450 0.0147 3.06 0.0022
## g * religiousness 0.0437 0.0141 3.11 0.0019
##
##
## $g_spline
## Frequencies of Missing Values Due to Each Variable
## fertility g religiousness race age
## 7 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ rcs(g) * religiousness + race + age,
## data = d_z)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 224.50 R2 0.049
## sigma0.9767 d.f. 14 R2 adj 0.046
## d.f. 4440 Pr(> chi2) 0.0000 g 0.249
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.90776 -0.71787 0.05121 0.61450 6.97383
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0238 0.1184 -0.20 0.8407
## g -0.0321 0.0707 -0.45 0.6499
## g' -0.0613 0.3078 -0.20 0.8421
## g'' 0.2891 1.5033 0.19 0.8475
## g''' -0.4297 2.6092 -0.16 0.8692
## religiousness 0.3081 0.1348 2.29 0.0223
## race=Black 0.2467 0.0501 4.92 <0.0001
## race=Hispanic 0.3340 0.0722 4.62 <0.0001
## race=Asian 0.0243 0.1684 0.14 0.8853
## race=Native 0.1112 0.1422 0.78 0.4343
## age 0.0458 0.0148 3.09 0.0020
## g * religiousness 0.1285 0.0815 1.58 0.1149
## g' * religiousness -0.4275 0.3398 -1.26 0.2084
## g'' * religiousness 1.9416 1.5934 1.22 0.2231
## g''' * religiousness -2.7658 2.6287 -1.05 0.2928
##
##
## $reli_spline
## Frequencies of Missing Values Due to Each Variable
## fertility g religiousness race age
## 7 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ g * rcs(religiousness) + race + age,
## data = d_z)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 231.93 R2 0.051
## sigma0.9758 d.f. 14 R2 adj 0.048
## d.f. 4440 Pr(> chi2) 0.0000 g 0.255
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.89874 -0.72248 0.04202 0.60719 7.04277
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.2308 0.1188 -1.94 0.0521
## g -0.2353 0.1145 -2.06 0.0399
## religiousness 0.0536 0.0789 0.68 0.4971
## religiousness' 0.4724 0.2820 1.68 0.0940
## religiousness'' -3.7145 2.4873 -1.49 0.1354
## religiousness''' 6.3026 5.2787 1.19 0.2326
## race=Black 0.2478 0.0495 5.00 <0.0001
## race=Hispanic 0.3354 0.0722 4.65 <0.0001
## race=Asian 0.0243 0.1683 0.14 0.8854
## race=Native 0.1173 0.1421 0.83 0.4092
## age 0.0449 0.0147 3.05 0.0023
## g * religiousness -0.0788 0.0765 -1.03 0.3033
## g * religiousness' 0.4075 0.2698 1.51 0.1309
## g * religiousness'' -2.1347 2.3325 -0.92 0.3601
## g * religiousness''' 2.0103 4.8920 0.41 0.6811
##
##
## $both_spline
## Frequencies of Missing Values Due to Each Variable
## fertility g religiousness race age
## 7 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ rcs(g) * rcs(religiousness) + race +
## age, data = d_z)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 239.93 R2 0.052
## sigma0.9766 d.f. 29 R2 adj 0.046
## d.f. 4425 Pr(> chi2) 0.0000 g 0.259
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.91741 -0.72974 0.03742 0.61557 6.99849
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.2654 1.2414 1.02 0.3081
## g 0.6766 0.7895 0.86 0.3915
## g' -3.3279 3.0077 -1.11 0.2686
## g'' 12.9625 13.4439 0.96 0.3350
## g''' -16.7864 21.3243 -0.79 0.4312
## religiousness 1.2076 0.9432 1.28 0.2005
## religiousness' -2.9048 2.7836 -1.04 0.2967
## religiousness'' 21.2222 21.3402 0.99 0.3200
## religiousness''' -38.8329 42.2663 -0.92 0.3583
## race=Black 0.2483 0.0501 4.95 <0.0001
## race=Hispanic 0.3345 0.0723 4.63 <0.0001
## race=Asian 0.0357 0.1687 0.21 0.8324
## race=Native 0.1144 0.1424 0.80 0.4219
## age 0.0463 0.0148 3.12 0.0018
## g * religiousness 0.5951 0.6166 0.97 0.3345
## g' * religiousness -2.7538 2.2194 -1.24 0.2148
## g'' * religiousness 11.3009 9.5016 1.19 0.2344
## g''' * religiousness -14.9732 14.3342 -1.04 0.2963
## g * religiousness' -1.7027 1.7532 -0.97 0.3315
## g' * religiousness' 7.2019 6.8291 1.05 0.2917
## g'' * religiousness' -27.7604 31.0647 -0.89 0.3716
## g''' * religiousness' 37.9095 50.3064 0.75 0.4511
## g * religiousness'' 13.9435 13.0212 1.07 0.2843
## g' * religiousness'' -51.1805 54.1905 -0.94 0.3450
## g'' * religiousness'' 202.2231 257.4386 0.79 0.4322
## g''' * religiousness'' -314.5354 435.9250 -0.72 0.4706
## g * religiousness''' -27.4379 25.3689 -1.08 0.2795
## g' * religiousness''' 92.8035 109.3950 0.85 0.3963
## g'' * religiousness''' -384.7428 531.1933 -0.72 0.4689
## g''' * religiousness''' 659.4486 918.9303 0.72 0.4730
##
##
## $white_interact
## Frequencies of Missing Values Due to Each Variable
## fertility g religiousness age
## 5 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ g * religiousness + age, data = d_z_white)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3649 LR chi2 125.88 R2 0.034
## sigma0.9342 d.f. 4 R2 adj 0.033
## d.f. 3644 Pr(> chi2) 0.0000 g 0.189
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.74834 -0.65230 0.09718 0.63652 6.79781
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0427 0.0157 -2.72 0.0065
## g -0.0301 0.0158 -1.91 0.0564
## religiousness 0.1368 0.0158 8.67 <0.0001
## age 0.0341 0.0157 2.17 0.0297
## g * religiousness 0.0640 0.0157 4.08 <0.0001
##
##
## $black_interact
## Linear Regression Model
##
## ols(formula = fertility ~ g * religiousness + age, data = d_z_black)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 525 LR chi2 20.30 R2 0.038
## sigma1.1791 d.f. 4 R2 adj 0.031
## d.f. 520 Pr(> chi2) 0.0004 g 0.264
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.05459 -0.87755 -0.06329 0.65450 6.81865
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1053 0.0947 1.11 0.2666
## g -0.0902 0.0627 -1.44 0.1505
## religiousness 0.2000 0.0938 2.13 0.0335
## age 0.1021 0.0497 2.05 0.0404
## g * religiousness -0.0120 0.0654 -0.18 0.8551
##
##
## $race_3interact
## Frequencies of Missing Values Due to Each Variable
## fertility g religiousness race age
## 5 0 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ g * religiousness * race + age, data = d_z_bw)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4174 LR chi2 200.23 R2 0.047
## sigma0.9683 d.f. 8 R2 adj 0.045
## d.f. 4165 Pr(> chi2) 0.0000 g 0.232
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.99307 -0.67747 0.08242 0.63613 6.92578
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0427 0.0162 -2.63 0.0086
## g -0.0310 0.0163 -1.90 0.0573
## religiousness 0.1364 0.0163 8.34 <0.0001
## race=Black 0.1468 0.0794 1.85 0.0646
## age 0.0435 0.0151 2.88 0.0041
## g * religiousness 0.0642 0.0163 3.95 <0.0001
## g * race=Black -0.0587 0.0540 -1.09 0.2768
## religiousness * race=Black 0.0631 0.0788 0.80 0.4235
## g * religiousness * race=Black -0.0787 0.0561 -1.40 0.1608
##
##
## $interact_noage
## Frequencies of Missing Values Due to Each Variable
## fertility g religiousness race
## 7 0 0 0
##
## Linear Regression Model
##
## ols(formula = fertility ~ g * religiousness + race, data = d_z)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 213.18 R2 0.047
## sigma0.9771 d.f. 7 R2 adj 0.045
## d.f. 4447 Pr(> chi2) 0.0000 g 0.242
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.85311 -0.70404 0.05942 0.61251 7.10581
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0452 0.0163 -2.76 0.0057
## g -0.0392 0.0151 -2.60 0.0094
## religiousness 0.1537 0.0149 10.29 <0.0001
## race=Black 0.2506 0.0496 5.06 <0.0001
## race=Hispanic 0.3404 0.0722 4.72 <0.0001
## race=Asian 0.0163 0.1684 0.10 0.9231
## race=Native 0.1051 0.1422 0.74 0.4597
## g * religiousness 0.0428 0.0141 3.04 0.0024
Jensen method
d_jensen_method = bind_cols(
g_test_data_imp,
fertility = d$fertility
)
fa_Jensens_method(fa_g, df = d_jensen_method, "fertility") +
scale_y_continuous("Correlation of test with fertility") +
scale_x_continuous("Loading on the general factor of intelligence")
## Using Pearson correlations for the criterion-indicators relationships.
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/jensen_method.png")
## `geom_smooth()` using formula = 'y ~ x'
#extrapolate to g-loading = 1
d_jensen_method_data = tibble(
test = g_tests,
g_loading = fa_g$loadings[, 1],
r_fertility = map_dbl(g_test_data_imp, ~wtd.cors(d$fertility, .))
)
#linear model
(jensen_fit = ols(r_fertility ~ g_loading, data = d_jensen_method_data))
## Linear Regression Model
##
## ols(formula = r_fertility ~ g_loading, data = d_jensen_method_data)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 19 LR chi2 14.27 R2 0.528
## sigma0.0279 d.f. 1 R2 adj 0.500
## d.f. 17 Pr(> chi2) 0.0002 g 0.034
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.046215 -0.021412 0.002139 0.018504 0.051269
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0380 0.0247 1.54 0.1427
## g_loading -0.1669 0.0383 -4.36 0.0004
#slope + intercept
coef(jensen_fit)[1] + coef(jensen_fit)[2]
## Intercept
## -0.129