Religiousness
Select items, factor analyze score.
#items and nice names
#these were suitable items that found in a search for god, and relig
#Dutton says:
#27; 50; 53; 58; 95; 98; 115; 209; 249; 258; 369; 476; 483; 490; 491
#and, debatably, 106 and 202
#Emil picks
religion_items = c(
priest_healing = 53,
prophets_accurate = 58,
church_almost_every_week = 95,
believe_in_second_life = 115,
very_religious = 206,
belief_in_devil_and_hell = 249,
belief_in_god = 258,
only_one_true_religion = 373,
special_agent_god = 476,
christ_miracles = 483,
read_bible_often = 490,
no_patience_people_true_believers = 491
)
#prevalence (proportion TRUE)
MMPI_questions$prevalence = colMeans(MMPI_items, na.rm = T)
MMPI_questions$missing = miss_by_var(MMPI_items, prop = T)
#our subset
MMPI_questions[religion_items, ]
#copy to main
for (i in seq_along(religion_items)) {
d[[names(religion_items)[i]]] = MMPI_items[[religion_items[i]]]
}
#religion data
religion_data = MMPI_items[religion_items] %>%
set_names(names(religion_items))
#missing
religion_data_miss = miss_by_case(religion_data, reverse = T)
#correlations
religion_data %>% mixedCor()
## Call: mixedCor(data = .)
## prst_ prph_ ch___ bl___ vry_r b____ blf__
## priest_healing 1.00
## prophets_accurate 0.57 1.00
## church_almost_every_week 0.33 0.51 1.00
## believe_in_second_life 0.40 0.53 0.50 1.00
## very_religious 0.39 0.53 0.68 0.44 1.00
## belief_in_devil_and_hell 0.45 0.63 0.52 0.74 0.46 1.00
## belief_in_god 0.38 0.66 0.57 0.82 0.58 0.85 1.00
## only_one_true_religion 0.36 0.59 0.41 0.36 0.52 0.53 0.58
## special_agent_god 0.34 0.45 0.38 0.35 0.55 0.35 0.34
## christ_miracles 0.48 0.56 0.55 0.67 0.50 0.70 0.84
## read_bible_often 0.49 0.69 0.79 0.48 0.76 0.54 0.56
## no_patience_people_true_believers -0.17 -0.29 -0.30 -0.23 -0.27 -0.31 -0.29
## on___ spc__ chrs_ rd_b_
## priest_healing
## prophets_accurate
## church_almost_every_week
## believe_in_second_life
## very_religious
## belief_in_devil_and_hell
## belief_in_god
## only_one_true_religion 1.00
## special_agent_god 0.43 1.00
## christ_miracles 0.45 0.33 1.00
## read_bible_often 0.57 0.54 0.57 1.00
## no_patience_people_true_believers -0.43 -0.09 -0.25 -0.34
## [1] 1.00
#MIRT
religion_fit = mirt(religion_data, model = 1, technical = list(removeEmptyRows = T))
## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders
##
Iteration: 1, Log-Lik: -24018.628, Max-Change: 1.29602
Iteration: 2, Log-Lik: -22252.825, Max-Change: 1.15131
Iteration: 3, Log-Lik: -21852.463, Max-Change: 1.10645
Iteration: 4, Log-Lik: -21769.643, Max-Change: 0.38262
Iteration: 5, Log-Lik: -21734.538, Max-Change: 0.40918
Iteration: 6, Log-Lik: -21716.733, Max-Change: 0.41660
Iteration: 7, Log-Lik: -21705.239, Max-Change: 0.24595
Iteration: 8, Log-Lik: -21699.488, Max-Change: 0.16613
Iteration: 9, Log-Lik: -21696.375, Max-Change: 0.08014
Iteration: 10, Log-Lik: -21695.420, Max-Change: 0.06825
Iteration: 11, Log-Lik: -21694.184, Max-Change: 0.06579
Iteration: 12, Log-Lik: -21693.506, Max-Change: 0.03398
Iteration: 13, Log-Lik: -21693.123, Max-Change: 0.01961
Iteration: 14, Log-Lik: -21692.941, Max-Change: 0.01109
Iteration: 15, Log-Lik: -21692.840, Max-Change: 0.00900
Iteration: 16, Log-Lik: -21692.697, Max-Change: 0.00249
Iteration: 17, Log-Lik: -21692.693, Max-Change: 0.00122
Iteration: 18, Log-Lik: -21692.690, Max-Change: 0.00116
Iteration: 19, Log-Lik: -21692.683, Max-Change: 0.00029
Iteration: 20, Log-Lik: -21692.683, Max-Change: 0.00028
Iteration: 21, Log-Lik: -21692.683, Max-Change: 0.00025
Iteration: 22, Log-Lik: -21692.682, Max-Change: 0.00019
Iteration: 23, Log-Lik: -21692.682, Max-Change: 0.00093
Iteration: 24, Log-Lik: -21692.682, Max-Change: 0.00044
Iteration: 25, Log-Lik: -21692.682, Max-Change: 0.00021
Iteration: 26, Log-Lik: -21692.682, Max-Change: 0.00077
Iteration: 27, Log-Lik: -21692.682, Max-Change: 0.00067
Iteration: 28, Log-Lik: -21692.682, Max-Change: 0.00020
Iteration: 29, Log-Lik: -21692.682, Max-Change: 0.00069
Iteration: 30, Log-Lik: -21692.682, Max-Change: 0.00064
Iteration: 31, Log-Lik: -21692.681, Max-Change: 0.00018
Iteration: 32, Log-Lik: -21692.681, Max-Change: 0.00063
Iteration: 33, Log-Lik: -21692.681, Max-Change: 0.00057
Iteration: 34, Log-Lik: -21692.681, Max-Change: 0.00016
Iteration: 35, Log-Lik: -21692.681, Max-Change: 0.00058
Iteration: 36, Log-Lik: -21692.681, Max-Change: 0.00051
Iteration: 37, Log-Lik: -21692.681, Max-Change: 0.00015
Iteration: 38, Log-Lik: -21692.681, Max-Change: 0.00054
Iteration: 39, Log-Lik: -21692.681, Max-Change: 0.00047
Iteration: 40, Log-Lik: -21692.681, Max-Change: 0.00014
Iteration: 41, Log-Lik: -21692.681, Max-Change: 0.00050
Iteration: 42, Log-Lik: -21692.681, Max-Change: 0.00044
Iteration: 43, Log-Lik: -21692.681, Max-Change: 0.00013
Iteration: 44, Log-Lik: -21692.681, Max-Change: 0.00046
Iteration: 45, Log-Lik: -21692.681, Max-Change: 0.00042
Iteration: 46, Log-Lik: -21692.681, Max-Change: 0.00013
Iteration: 47, Log-Lik: -21692.681, Max-Change: 0.00044
Iteration: 48, Log-Lik: -21692.681, Max-Change: 0.00039
Iteration: 49, Log-Lik: -21692.681, Max-Change: 0.00012
Iteration: 50, Log-Lik: -21692.681, Max-Change: 0.00043
Iteration: 51, Log-Lik: -21692.681, Max-Change: 0.00037
Iteration: 52, Log-Lik: -21692.681, Max-Change: 0.00012
Iteration: 53, Log-Lik: -21692.681, Max-Change: 0.00043
Iteration: 54, Log-Lik: -21692.681, Max-Change: 0.00036
Iteration: 55, Log-Lik: -21692.680, Max-Change: 0.00011
Iteration: 56, Log-Lik: -21692.680, Max-Change: 0.00042
Iteration: 57, Log-Lik: -21692.680, Max-Change: 0.00034
Iteration: 58, Log-Lik: -21692.680, Max-Change: 0.00011
Iteration: 59, Log-Lik: -21692.680, Max-Change: 0.00041
Iteration: 60, Log-Lik: -21692.680, Max-Change: 0.00032
Iteration: 61, Log-Lik: -21692.680, Max-Change: 0.00010
Iteration: 62, Log-Lik: -21692.680, Max-Change: 0.00041
Iteration: 63, Log-Lik: -21692.680, Max-Change: 0.00031
Iteration: 64, Log-Lik: -21692.680, Max-Change: 0.00010
religion_fit
##
## Call:
## mirt(data = religion_data, model = 1, technical = list(removeEmptyRows = T))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 64 EM iterations.
## mirt version: 1.39
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -21693
## Estimated parameters: 24
## AIC = 43433
## BIC = 43587; SABIC = 43511
religion_fit %>% summary()
## F1 h2
## priest_healing 0.599 0.359
## prophets_accurate 0.757 0.573
## church_almost_every_week 0.763 0.582
## believe_in_second_life 0.779 0.607
## very_religious 0.787 0.619
## belief_in_devil_and_hell 0.830 0.688
## belief_in_god 0.954 0.909
## only_one_true_religion 0.649 0.421
## special_agent_god 0.603 0.364
## christ_miracles 0.811 0.658
## read_bible_often 0.913 0.834
## no_patience_people_true_believers -0.360 0.130
##
## SS loadings: 6.74
## Proportion Var: 0.562
##
## Factor correlations:
##
## F1
## F1 1
religion_fit_scores = fscores(religion_fit, full.scores = T, full.scores.SE = T)
d$religiousness = NA
d$religiousness[religion_data_miss != 0] = religion_fit_scores[, 1]
d$religiousness %<>% standardize(focal_group = (d$race == "White"))
#distribution
d$religiousness %>% GG_denhist() +
scale_x_continuous("Religiousness (factor score)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/religiousness_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#psych package (just for verification)
religion_fit_psych = irt.fa(religion_data)

religion_fit_psych$fa
## Factor Analysis using method = minres
## Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate,
## fm = fm)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## priest_healing 0.56 0.31 0.69 1
## prophets_accurate 0.78 0.61 0.39 1
## church_almost_every_week 0.73 0.53 0.47 1
## believe_in_second_life 0.73 0.53 0.47 1
## very_religious 0.73 0.54 0.46 1
## belief_in_devil_and_hell 0.81 0.65 0.35 1
## belief_in_god 0.87 0.76 0.24 1
## only_one_true_religion 0.67 0.44 0.56 1
## special_agent_god 0.53 0.28 0.72 1
## christ_miracles 0.79 0.62 0.38 1
## read_bible_often 0.82 0.68 0.32 1
## no_patience_people_true_believers -0.37 0.14 0.86 1
##
## MR1
## SS loadings 6.10
## Proportion Var 0.51
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 66 with the objective function = 9.62 with Chi Square = 42859
## df of the model are 54 and the objective function was 3.15
##
## The root mean square of the residuals (RMSR) is 0.09
## The df corrected root mean square of the residuals is 0.1
##
## The harmonic n.obs is 4462 with the empirical chi square 4638 with prob < 0
## The total n.obs was 4462 with Likelihood Chi Square = 14046 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.6
## RMSEA index = 0.241 and the 90 % confidence intervals are 0.238 0.244
## BIC = 13592
## Fit based upon off diagonal values = 0.97
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.97
## Multiple R square of scores with factors 0.95
## Minimum correlation of possible factor scores 0.89
religion_fit_psych_scores = scoreIrt(religion_fit_psych, religion_data)
d$religiousness2 = religion_fit_psych_scores[, 1]
d$religiousness2 %<>% standardize(focal_group = (d$race == "White"))
#scores match?
cor.test(d$religiousness, d$religiousness2)
##
## Pearson's product-moment correlation
##
## data: d$religiousness and d$religiousness2
## t = 379, df = 4460, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.984 0.986
## sample estimates:
## cor
## 0.985
ggplot(d, aes(religiousness, religiousness2)) +
geom_count() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#reliability
alpha(religion_data, check.keys=TRUE)
## Warning in alpha(religion_data, check.keys = TRUE): Some items were negatively correlated with total scale and were automatically reversed.
## This is indicated by a negative sign for the variable name.
##
## Reliability analysis
## Call: alpha(x = religion_data, check.keys = TRUE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.79 0.79 0.8 0.24 3.8 0.0044 0.45 0.22 0.2
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.78 0.79 0.8
## Duhachek 0.78 0.79 0.8
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## priest_healing 0.79 0.79 0.80 0.26 3.8
## prophets_accurate 0.76 0.76 0.78 0.23 3.2
## church_almost_every_week 0.77 0.77 0.78 0.23 3.4
## believe_in_second_life 0.77 0.77 0.78 0.23 3.4
## very_religious 0.78 0.77 0.78 0.24 3.4
## belief_in_devil_and_hell 0.76 0.76 0.77 0.23 3.2
## belief_in_god 0.78 0.78 0.78 0.24 3.5
## only_one_true_religion 0.77 0.77 0.78 0.24 3.4
## special_agent_god 0.79 0.79 0.80 0.25 3.8
## christ_miracles 0.77 0.77 0.78 0.23 3.3
## read_bible_often 0.77 0.77 0.78 0.23 3.3
## no_patience_people_true_believers- 0.80 0.79 0.80 0.26 3.9
## alpha se var.r med.r
## priest_healing 0.0045 0.014 0.24
## prophets_accurate 0.0051 0.015 0.19
## church_almost_every_week 0.0048 0.014 0.20
## believe_in_second_life 0.0048 0.013 0.20
## very_religious 0.0048 0.014 0.20
## belief_in_devil_and_hell 0.0051 0.013 0.20
## belief_in_god 0.0047 0.012 0.23
## only_one_true_religion 0.0048 0.016 0.19
## special_agent_god 0.0045 0.014 0.24
## christ_miracles 0.0049 0.013 0.20
## read_bible_often 0.0048 0.013 0.20
## no_patience_people_true_believers- 0.0043 0.014 0.24
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## priest_healing 4419 0.37 0.41 0.31 0.27 0.083 0.28
## prophets_accurate 4292 0.67 0.65 0.61 0.55 0.505 0.50
## church_almost_every_week 4461 0.60 0.59 0.55 0.48 0.250 0.43
## believe_in_second_life 4410 0.60 0.59 0.55 0.48 0.756 0.43
## very_religious 4449 0.56 0.58 0.53 0.46 0.171 0.38
## belief_in_devil_and_hell 4413 0.68 0.66 0.63 0.57 0.630 0.48
## belief_in_god 4426 0.51 0.53 0.48 0.42 0.915 0.28
## only_one_true_religion 4423 0.61 0.58 0.53 0.47 0.374 0.48
## special_agent_god 4442 0.38 0.42 0.32 0.28 0.089 0.28
## christ_miracles 4328 0.61 0.61 0.57 0.50 0.766 0.42
## read_bible_often 4452 0.58 0.60 0.57 0.49 0.126 0.33
## no_patience_people_true_believers- 4429 0.40 0.38 0.27 0.25 0.734 0.44
##
## Non missing response frequency for each item
## 0 1 miss
## priest_healing 0.92 0.08 0.01
## prophets_accurate 0.50 0.50 0.04
## church_almost_every_week 0.75 0.25 0.00
## believe_in_second_life 0.24 0.76 0.01
## very_religious 0.83 0.17 0.00
## belief_in_devil_and_hell 0.37 0.63 0.01
## belief_in_god 0.08 0.92 0.01
## only_one_true_religion 0.63 0.37 0.01
## special_agent_god 0.91 0.09 0.00
## christ_miracles 0.23 0.77 0.03
## read_bible_often 0.87 0.13 0.00
## no_patience_people_true_believers 0.73 0.27 0.01
empirical_rxx(religion_fit_scores)
## F1
## 0.836
marginal_rxx(religion_fit)
## [1] 0.834
plot(religion_fit, type = "rxx")

# save_plot_to_file(
# plot(religion_fit, type = "rxx"),
# filename = "figs/religiousness_reliability.png")
#religiousness by race
GG_group_means(d, "religiousness", "race", type = "boxplot")

SMD_matrix(-d$religiousness, d$race, extended_output = T)
## $d
## White Black Hispanic Asian Native
## White NA 0.352 0.1544 -0.209 0.0663
## Black 0.3522 NA -0.1978 -0.561 -0.2859
## Hispanic 0.1544 -0.198 NA -0.364 -0.0881
## Asian -0.2091 -0.561 -0.3635 NA 0.2755
## Native 0.0663 -0.286 -0.0881 0.275 NA
##
## $d_string
## White Black Hispanic
## White NA "0.35 [0.26 0.44]" "0.15 [0.01 0.30]"
## Black "0.35 [0.26 0.44]" NA "-0.20 [-0.36 -0.03]"
## Hispanic "0.15 [0.01 0.30]" "-0.20 [-0.36 -0.03]" NA
## Asian "-0.21 [-0.55 0.13]" "-0.56 [-0.91 -0.21]" "-0.36 [-0.73 0.00]"
## Native "0.07 [-0.22 0.35]" "-0.29 [-0.58 0.01]" "-0.09 [-0.40 0.22]"
## Asian Native
## White "-0.21 [-0.55 0.13]" "0.07 [-0.22 0.35]"
## Black "-0.56 [-0.91 -0.21]" "-0.29 [-0.58 0.01]"
## Hispanic "-0.36 [-0.73 0.00]" "-0.09 [-0.40 0.22]"
## Asian NA "0.28 [-0.16 0.71]"
## Native "0.28 [-0.16 0.71]" NA
##
## $CI_lower
## White Black Hispanic Asian Native
## White NA 0.260 0.012 -0.547 -0.216
## Black 0.260 NA -0.361 -0.910 -0.579
## Hispanic 0.012 -0.361 NA -0.729 -0.401
## Asian -0.547 -0.910 -0.729 NA -0.164
## Native -0.216 -0.579 -0.401 -0.164 NA
##
## $CI_upper
## White Black Hispanic Asian Native
## White NA 0.44403 0.29676 0.12858 0.34820
## Black 0.444 NA -0.03467 -0.21297 0.00733
## Hispanic 0.297 -0.03467 NA 0.00155 0.22446
## Asian 0.129 -0.21297 0.00155 NA 0.71500
## Native 0.348 0.00733 0.22446 0.71500 NA
##
## $se_z
## [1] 1.96
##
## $se
## White Black Hispanic Asian Native
## White NA 0.0468 0.0726 0.172 0.144
## Black 0.0468 NA 0.0833 0.178 0.150
## Hispanic 0.0726 0.0833 NA 0.186 0.159
## Asian 0.1723 0.1778 0.1863 NA 0.224
## Native 0.1438 0.1496 0.1594 0.224 NA
##
## $p
## White Black Hispanic Asian Native
## White NA 5.43e-14 0.0336 0.22485 0.645
## Black 5.43e-14 NA 0.0175 0.00159 0.056
## Hispanic 3.36e-02 1.75e-02 NA 0.05098 0.581
## Asian 2.25e-01 1.59e-03 0.0510 NA 0.219
## Native 6.45e-01 5.60e-02 0.5808 0.21929 NA
##
## $pairwise_n
## White Black Hispanic Asian Native
## White NA 4179 3854 3688 3703
## Black 4179 NA 725 559 574
## Hispanic 3854 725 NA 234 249
## Asian 3688 559 234 NA 83
## Native 3703 574 249 83 NA
Intelligence
#g tests
g_tests_early = c("VE_time1", "AR_time1", "PA", "GIT", "AFQT")
g_tests_later = c("VE_time2", "AR_time2", "WAIS_BD", "WAIS_GI", "WRAT", "PASAT", "WLGT", "copy_direct", "copy_immediate", "copy_delayed", "CVLT", "WCST", "GPT_left", "GPT_right")
g_tests = c(g_tests_early, g_tests_later)
#impute missing data
g_test_data_imp = d[g_tests] %>% miss_impute()
#standardize test variables
for (v in g_tests) d[[v]] = d[[v]] %>% standardize(focal_group = d$race == "White")
#all tests g
fa_g = fa(g_test_data_imp)
fa_g
## Factor Analysis using method = minres
## Call: fa(r = g_test_data_imp)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time1 0.82 0.67 0.33 1
## AR_time1 0.81 0.66 0.34 1
## PA 0.71 0.50 0.50 1
## GIT 0.69 0.48 0.52 1
## AFQT 0.85 0.73 0.27 1
## VE_time2 0.82 0.67 0.33 1
## AR_time2 0.82 0.67 0.33 1
## WAIS_BD 0.67 0.45 0.55 1
## WAIS_GI 0.76 0.58 0.42 1
## WRAT 0.73 0.53 0.47 1
## PASAT 0.57 0.32 0.68 1
## WLGT 0.49 0.24 0.76 1
## copy_direct 0.47 0.22 0.78 1
## copy_immediate 0.55 0.30 0.70 1
## copy_delayed 0.55 0.30 0.70 1
## CVLT 0.42 0.18 0.82 1
## WCST 0.46 0.21 0.79 1
## GPT_left 0.34 0.12 0.88 1
## GPT_right 0.33 0.11 0.89 1
##
## MR1
## SS loadings 7.94
## Proportion Var 0.42
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 171 with the objective function = 12.6 with Chi Square = 56117
## df of the model are 152 and the objective function was 3.89
##
## The root mean square of the residuals (RMSR) is 0.09
## The df corrected root mean square of the residuals is 0.1
##
## The harmonic n.obs is 4462 with the empirical chi square 13120 with prob < 0
## The total n.obs was 4462 with Likelihood Chi Square = 17324 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.655
## RMSEA index = 0.159 and the 90 % confidence intervals are 0.157 0.161
## BIC = 16047
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.97
## Multiple R square of scores with factors 0.95
## Minimum correlation of possible factor scores 0.89
#earlier tests only
fa_g_time1 = fa(g_test_data_imp[g_tests_early])
fa_g_time1
## Factor Analysis using method = minres
## Call: fa(r = g_test_data_imp[g_tests_early])
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time1 0.82 0.67 0.33 1
## AR_time1 0.82 0.68 0.32 1
## PA 0.70 0.49 0.51 1
## GIT 0.73 0.53 0.47 1
## AFQT 0.92 0.85 0.15 1
##
## MR1
## SS loadings 3.22
## Proportion Var 0.64
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 10 with the objective function = 3.14 with Chi Square = 13984
## df of the model are 5 and the objective function was 0.16
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 4462 with the empirical chi square 175 with prob < 6.6e-36
## The total n.obs was 4462 with Likelihood Chi Square = 711 with prob < 1.7e-151
##
## Tucker Lewis Index of factoring reliability = 0.899
## RMSEA index = 0.178 and the 90 % confidence intervals are 0.167 0.189
## BIC = 669
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.96
## Multiple R square of scores with factors 0.93
## Minimum correlation of possible factor scores 0.85
#later tests only
fa_g_time2 = fa(g_test_data_imp[g_tests_later])
fa_g_time2
## Factor Analysis using method = minres
## Call: fa(r = g_test_data_imp[g_tests_later])
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time2 0.78 0.61 0.39 1
## AR_time2 0.79 0.62 0.38 1
## WAIS_BD 0.67 0.45 0.55 1
## WAIS_GI 0.72 0.52 0.48 1
## WRAT 0.71 0.50 0.50 1
## PASAT 0.58 0.33 0.67 1
## WLGT 0.50 0.25 0.75 1
## copy_direct 0.51 0.26 0.74 1
## copy_immediate 0.61 0.37 0.63 1
## copy_delayed 0.61 0.37 0.63 1
## CVLT 0.45 0.20 0.80 1
## WCST 0.47 0.22 0.78 1
## GPT_left 0.38 0.14 0.86 1
## GPT_right 0.37 0.13 0.87 1
##
## MR1
## SS loadings 5.01
## Proportion Var 0.36
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 91 with the objective function = 7.17 with Chi Square = 31949
## df of the model are 77 and the objective function was 2.8
##
## The root mean square of the residuals (RMSR) is 0.11
## The df corrected root mean square of the residuals is 0.12
##
## The harmonic n.obs is 4462 with the empirical chi square 9397 with prob < 0
## The total n.obs was 4462 with Likelihood Chi Square = 12486 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.54
## RMSEA index = 0.19 and the 90 % confidence intervals are 0.187 0.193
## BIC = 11839
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.95
## Multiple R square of scores with factors 0.90
## Minimum correlation of possible factor scores 0.81
#save scores, standardize to white
d$g = fa_g$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time1 = fa_g_time1$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time2 = fa_g_time2$scores[, 1] %>% standardize(focal_group = d$race == "White")
#IQ scale
d$IQ = d$g * 15 + 100
#plot dist
GG_denhist(d, "g", vline = F)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/gdist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
alpha(g_test_data_imp)
## Number of categories should be increased in order to count frequencies.
##
## Reliability analysis
## Call: alpha(x = g_test_data_imp)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.92 0.95 0.39 12 0.0023 42 9.6 0.34
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.85 0.85 0.86
## Duhachek 0.85 0.85 0.86
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## VE_time1 0.83 0.92 0.94 0.38 11 0.0026 0.026 0.34
## AR_time1 0.83 0.92 0.94 0.38 11 0.0027 0.027 0.33
## PA 0.84 0.92 0.94 0.38 11 0.0025 0.029 0.33
## GIT 0.84 0.92 0.94 0.39 11 0.0025 0.029 0.34
## AFQT 0.86 0.92 0.94 0.38 11 0.0023 0.027 0.33
## VE_time2 0.83 0.92 0.94 0.38 11 0.0027 0.026 0.34
## AR_time2 0.83 0.92 0.94 0.38 11 0.0028 0.027 0.33
## WAIS_BD 0.85 0.92 0.94 0.39 11 0.0023 0.030 0.33
## WAIS_GI 0.85 0.92 0.94 0.38 11 0.0023 0.028 0.33
## WRAT 0.84 0.92 0.94 0.38 11 0.0025 0.028 0.34
## PASAT 0.88 0.92 0.94 0.39 12 0.0017 0.031 0.34
## WLGT 0.85 0.92 0.95 0.40 12 0.0024 0.030 0.36
## copy_direct 0.85 0.92 0.95 0.40 12 0.0023 0.030 0.36
## copy_immediate 0.85 0.92 0.94 0.39 12 0.0024 0.029 0.36
## copy_delayed 0.85 0.92 0.94 0.39 12 0.0023 0.029 0.36
## CVLT 0.85 0.92 0.95 0.40 12 0.0023 0.029 0.37
## WCST 0.86 0.92 0.95 0.40 12 0.0023 0.030 0.37
## GPT_left 0.85 0.93 0.94 0.41 12 0.0023 0.027 0.37
## GPT_right 0.85 0.93 0.94 0.41 12 0.0023 0.027 0.37
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## VE_time1 4462 0.81 0.79 0.80 0.76 107.14 22.20
## AR_time1 4462 0.83 0.79 0.79 0.78 104.38 21.94
## PA 4462 0.69 0.72 0.71 0.62 104.26 22.56
## GIT 4462 0.70 0.69 0.67 0.64 102.02 18.33
## AFQT 4462 0.80 0.83 0.84 0.80 0.14 0.81
## VE_time2 4462 0.82 0.80 0.81 0.77 116.52 23.04
## AR_time2 4462 0.84 0.81 0.81 0.79 104.56 24.40
## WAIS_BD 4462 0.63 0.71 0.69 0.63 10.52 2.64
## WAIS_GI 4462 0.72 0.75 0.74 0.72 10.07 2.80
## WRAT 4462 0.75 0.73 0.72 0.71 61.16 14.74
## PASAT 4462 0.74 0.61 0.57 0.56 108.72 50.72
## WLGT 4462 0.53 0.53 0.49 0.49 35.12 10.92
## copy_direct 4462 0.44 0.54 0.49 0.43 32.73 3.31
## copy_immediate 4462 0.50 0.61 0.62 0.47 20.11 6.75
## copy_delayed 4462 0.50 0.61 0.62 0.47 20.27 6.33
## CVLT 4462 0.42 0.48 0.43 0.40 11.06 2.33
## WCST 4462 0.44 0.51 0.46 0.44 0.79 0.17
## GPT_left 4462 0.40 0.43 0.39 0.33 -77.38 13.76
## GPT_right 4462 0.39 0.42 0.38 0.33 -73.68 11.84
#method variance check
fa_g_all_methods = fa_all_methods(g_test_data_imp)
## 1 out of 40 - regression_minres
## Saving results from regression_minres
## 2 out of 40 - Thurstone_minres
## Saving results from Thurstone_minres
## 3 out of 40 - tenBerge_minres
## Saving results from tenBerge_minres
## 4 out of 40 - Anderson_minres
## Skipping Anderson_minres due to extraction error
## 5 out of 40 - Bartlett_minres
## Saving results from Bartlett_minres
## 6 out of 40 - regression_ols
## Saving results from regression_ols
## 7 out of 40 - Thurstone_ols
## Saving results from Thurstone_ols
## 8 out of 40 - tenBerge_ols
## Saving results from tenBerge_ols
## 9 out of 40 - Anderson_ols
## Skipping Anderson_ols due to extraction error
## 10 out of 40 - Bartlett_ols
## Saving results from Bartlett_ols
## 11 out of 40 - regression_wls
## Saving results from regression_wls
## 12 out of 40 - Thurstone_wls
## Saving results from Thurstone_wls
## 13 out of 40 - tenBerge_wls
## Saving results from tenBerge_wls
## 14 out of 40 - Anderson_wls
## Skipping Anderson_wls due to extraction error
## 15 out of 40 - Bartlett_wls
## Saving results from Bartlett_wls
## 16 out of 40 - regression_gls
## Saving results from regression_gls
## 17 out of 40 - Thurstone_gls
## Saving results from Thurstone_gls
## 18 out of 40 - tenBerge_gls
## Saving results from tenBerge_gls
## 19 out of 40 - Anderson_gls
## Skipping Anderson_gls due to extraction error
## 20 out of 40 - Bartlett_gls
## Saving results from Bartlett_gls
## 21 out of 40 - regression_pa
## Saving results from regression_pa
## 22 out of 40 - Thurstone_pa
## Saving results from Thurstone_pa
## 23 out of 40 - tenBerge_pa
## Saving results from tenBerge_pa
## 24 out of 40 - Anderson_pa
## Skipping Anderson_pa due to extraction error
## 25 out of 40 - Bartlett_pa
## Saving results from Bartlett_pa
## 26 out of 40 - regression_ml
## Saving results from regression_ml
## 27 out of 40 - Thurstone_ml
## Saving results from Thurstone_ml
## 28 out of 40 - tenBerge_ml
## Saving results from tenBerge_ml
## 29 out of 40 - Anderson_ml
## Skipping Anderson_ml due to extraction error
## 30 out of 40 - Bartlett_ml
## Saving results from Bartlett_ml
## 31 out of 40 - regression_minchi
## Saving results from regression_minchi
## 32 out of 40 - Thurstone_minchi
## Saving results from Thurstone_minchi
## 33 out of 40 - tenBerge_minchi
## Saving results from tenBerge_minchi
## 34 out of 40 - Anderson_minchi
## Skipping Anderson_minchi due to extraction error
## 35 out of 40 - Bartlett_minchi
## Saving results from Bartlett_minchi
## 36 out of 40 - regression_minrank
## Saving results from regression_minrank
## 37 out of 40 - Thurstone_minrank
## Saving results from Thurstone_minrank
## 38 out of 40 - tenBerge_minrank
## Saving results from tenBerge_minrank
## 39 out of 40 - Anderson_minrank
## Skipping Anderson_minrank due to extraction error
## 40 out of 40 - Bartlett_minrank
## Saving results from Bartlett_minrank
fa_g_all_methods$scores %>% GG_heatmap()

GG_save("figs/fa_methods.png")
fa_g_all_methods$scores %>%
cor() %>%
MAT_half(diag = F) %>%
describe2()
Main results
#correlations
d %>% select(fertility, g, religiousness, education, income) %>%
cor_matrix(p_val = T)
## fertility g religiousness education income
## fertility "1" "-0.11***" "0.17***" "-0.10***" "-0.02"
## g "-0.11***" "1" "-0.18***" "0.55***" "0.40***"
## religiousness "0.17***" "-0.18***" "1" "-0.09***" "-0.11***"
## education "-0.10***" "0.55***" "-0.09***" "1" "0.35***"
## income "-0.02" "0.40***" "-0.11***" "0.35***" "1"
d %>% select(fertility, g, religiousness, education, income) %>%
polycor::hetcor()
##
## Two-Step Estimates
##
## Correlations/Type of Correlation:
## fertility g religiousness education income
## fertility 1 Pearson Pearson Pearson Pearson
## g -0.108 1 Pearson Pearson Pearson
## religiousness 0.174 -0.184 1 Pearson Pearson
## education -0.105 0.545 -0.0907 1 Pearson
## income -0.0212 0.397 -0.108 0.349 1
##
## Standard Errors:
## fertility g religiousness education
## fertility
## g 0.0149
## religiousness 0.0147 0.0146
## education 0.015 0.0106 0.015
## income 0.0151 0.0127 0.0149 0.0133
##
## n = 4373
##
## P-values for Tests of Bivariate Normality:
## fertility g religiousness education
## fertility
## g 7.49e-56
## religiousness 5.62e-57 1.14e-29
## education 1.5e-290 6.04e-257 1.74e-258
## income 1.78e-69 4.34e-25 1.17e-27 9.71e-266
#is relationship linear?
GG_scatter(d, "religiousness", "fertility") +
geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/religiousness_fertility.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#models
#standardize variables of interest
d_z = d %>% select(fertility, religiousness, g, age, race) %>% df_standardize() %>% mutate(g = standardize(g, focal_group = (race == "White")))
## Skipped race because it is class factor
d_z_white = d_z %>% filter(race == "White")
d_z_black = d_z %>% filter(race == "Black")
d_z_bw = bind_rows(d_z_white, d_z_black) %>% mutate(race = fct_drop(race))
main_models = list(
no_controls = ols(fertility ~ religiousness + g, data = d_z),
controls = ols(fertility ~ religiousness + g + race + age, data = d_z),
interact = ols(fertility ~ g * religiousness + race + age, data = d_z),
g_spline = ols(fertility ~ rcs(g) * religiousness + race + age, data = d_z),
reli_spline = ols(fertility ~ g * rcs(religiousness) + race + age, data = d_z),
both_spline = ols(fertility ~ rcs(g )* rcs(religiousness) + race + age, data = d_z),
white_interact = ols(fertility ~ g * religiousness + age, data = d_z_white),
black_interact = ols(fertility ~ g * religiousness + age, data = d_z_black),
race_3interact = ols(fertility ~ g * religiousness * race + age, data = d_z_bw)
)
#p values for model comparisons
#test linear interaction
lrtest(main_models[[2]], main_models[[3]])
##
## Model 1: fertility ~ religiousness + g + race + age
## Model 2: fertility ~ g * religiousness + race + age
##
## L.R. Chisq d.f. P
## 9.66988 1.00000 0.00187
#test nonlinear g effect
lrtest(main_models[[3]], main_models[[4]])
##
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ rcs(g) * religiousness + race + age
##
## L.R. Chisq d.f. P
## 1.939 6.000 0.925
#test nonlinear religion effect
lrtest(main_models[[3]], main_models[[5]])
##
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ g * rcs(religiousness) + race + age
##
## L.R. Chisq d.f. P
## 9.362 6.000 0.154
#test nonlinear for both
lrtest(main_models[[3]], main_models[[6]])
##
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ rcs(g) * rcs(religiousness) + race + age
##
## L.R. Chisq d.f. P
## 17.366 21.000 0.689
#summarise
main_models[c(1:3, 7:8)] %>%
summarize_models(asterisks_only = F)
#visualize
#interaction with controls
ggpredict(main_models$interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("General intelligence (0 = White mean)") +
scale_y_continuous("Number of children at age 38") +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/g_fertility_interaction.png")
#whites
ggpredict(main_models$white_interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("General intelligence (0 = White mean)") +
scale_y_continuous("Number of children at age 38") +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/g_fertility_interaction_whites.png")
Count variable models
#standardize non-fertility variables
d_z_notfert = d %>% select(fertility, religiousness, g, age, race) %>% df_standardize(exclude = "fertility") %>% mutate(g = standardize(g, focal_group = (race == "White")))
## Skipped fertility because it is in the exclude list
## Skipped race because it is class factor
#hurdle models
count_models = list(
poisson = glm(fertility ~ g * religiousness + race + age, data = d_z_notfert, family = "poisson"),
zeroinflation = pscl::zeroinfl(fertility ~ g * religiousness + race + age, data = d_z_notfert),
hurdle = pscl::hurdle(fertility ~ g * religiousness + race + age, data = d_z_notfert)
)
map(count_models, summary)
## $poisson
##
## Call:
## glm(formula = fertility ~ g * religiousness + race + age, family = "poisson",
## data = d_z_notfert)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5638 0.0126 44.78 < 2e-16 ***
## g -0.0381 0.0115 -3.32 0.00089 ***
## religiousness 0.1174 0.0113 10.34 < 2e-16 ***
## raceBlack 0.1641 0.0349 4.70 2.6e-06 ***
## raceHispanic 0.2239 0.0493 4.54 5.6e-06 ***
## raceAsian 0.0113 0.1308 0.09 0.93094
## raceNative 0.0844 0.1052 0.80 0.42214
## age 0.0329 0.0111 2.97 0.00296 **
## g:religiousness 0.0402 0.0107 3.77 0.00016 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 5431.5 on 4454 degrees of freedom
## Residual deviance: 5210.5 on 4446 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 14663
##
## Number of Fisher Scoring iterations: 5
##
##
## $zeroinflation
##
## Call:
## pscl::zeroinfl(formula = fertility ~ g * religiousness + race + age,
## data = d_z_notfert)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.5594 -0.6607 0.0561 0.5751 6.6944
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6266 0.0155 40.31 < 2e-16 ***
## g -0.0189 0.0142 -1.33 0.1841
## religiousness 0.0937 0.0138 6.81 9.7e-12 ***
## raceBlack 0.1788 0.0407 4.39 1.1e-05 ***
## raceHispanic 0.2501 0.0569 4.39 1.1e-05 ***
## raceAsian 0.0533 0.1528 0.35 0.7274
## raceNative 0.0588 0.1161 0.51 0.6128
## age 0.0393 0.0135 2.90 0.0037 **
## g:religiousness 0.0279 0.0129 2.17 0.0303 *
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.8624 0.1915 -14.95 <2e-16 ***
## g 0.2797 0.1752 1.60 0.110
## religiousness -0.3589 0.1526 -2.35 0.019 *
## raceBlack 0.2719 0.4944 0.55 0.582
## raceHispanic 0.4616 0.5308 0.87 0.385
## raceAsian 0.5186 1.0014 0.52 0.605
## raceNative -0.6580 1.8853 -0.35 0.727
## age 0.1094 0.1521 0.72 0.472
## g:religiousness -0.0955 0.1358 -0.70 0.482
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 29
## Log-likelihood: -7.29e+03 on 18 Df
##
## $hurdle
##
## Call:
## pscl::hurdle(formula = fertility ~ g * religiousness + race + age, data = d_z_notfert)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.5485 -0.6656 0.0543 0.5765 6.6910
##
## Count model coefficients (truncated poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6271 0.0155 40.33 < 2e-16 ***
## g -0.0186 0.0141 -1.33 0.1848
## religiousness 0.0930 0.0139 6.67 2.5e-11 ***
## raceBlack 0.1827 0.0409 4.46 8.1e-06 ***
## raceHispanic 0.2509 0.0566 4.43 9.4e-06 ***
## raceAsian 0.0500 0.1592 0.31 0.7533
## raceNative 0.0228 0.1287 0.18 0.8591
## age 0.0401 0.0134 2.99 0.0028 **
## g:religiousness 0.0341 0.0131 2.61 0.0090 **
## Zero hurdle model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3780 0.0425 32.41 < 2e-16 ***
## g -0.1204 0.0402 -2.99 0.0028 **
## religiousness 0.2730 0.0393 6.94 3.9e-12 ***
## raceBlack 0.2196 0.1417 1.55 0.1211
## raceHispanic 0.2513 0.2056 1.22 0.2218
## raceAsian -0.0884 0.4129 -0.21 0.8304
## raceNative 0.4104 0.4150 0.99 0.3227
## age 0.0280 0.0384 0.73 0.4659
## g:religiousness 0.0593 0.0373 1.59 0.1120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 15
## Log-likelihood: -7.29e+03 on 18 Df
#visualize
p_poisson = ggpredict(count_models$poisson, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("g") +
scale_y_continuous("Number of children at age 38") +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_poisson

p_zeroinf = ggpredict(count_models$zeroinflation, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("g") +
scale_y_continuous(NULL) +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_zeroinf

p_count = ggpredict(count_models$hurdle, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>%
plot() +
scale_x_continuous("g") +
scale_y_continuous(NULL) +
ggtitle(NULL)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_count

(p_poisson + theme(legend.position = "none")) + (p_zeroinf + theme(legend.position = "none")) + p_count

GG_save("figs/count_models.png")