Init

#options
options(
  digits = 3
)

# library(renv)
#devtools::install_version("broom", "0.5.6")

#packages
library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  rms, 
  mirt,
  readxl,
  lavaan,
  ggeffects,
  pscl,
  patchwork
  )

## Loading required package: stats4
## Loading required package: lattice
## This is lavaan 0.6-17
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## 
## The following object is masked from 'package:psych':
## 
##     cor2cov
## 
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis

#set the theme for all plots
theme_set(theme_bw())

Functions

#residual slope to get standardized value
resid_slope = function(x) {sqrt(1-x^2)}
resid_slope(.71)

## [1] 0.704

resid_slope(1)

## [1] 0

resid_slope(0)

## [1] 1

resid_slope(.5)

## [1] 0.866

sqrt(1 - (.5^2))

## [1] 0.866

Data

Read the data from various files.

#datasets
d = read_rds("data/VES merged.rds")
d_vars = read_csv("data/VES_dataset_variables.csv")

## Rows: 3341 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): code, name
## dbl (1): number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

MMPI_items = d %>% select(MM010001:MM010566)
MMPI_questions = read_excel("data/MMPI-1 items.xlsx")
cog_items = read_tsv("data/VES_items_binary.tsv")

## Rows: 4462 Columns: 202
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## dbl (202): WAIS_R_INF_ITEM_1, WAIS_R_INF_ITEM_2, WAIS_R_INF_ITEM_3, WAIS_R_I...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# ves_more_data = read_excel("data/race_testis_semen_fertility_Emil.xlsx") #broken file
dysgenics2 = readxl::read_excel("data/Aldo_dysgenics_VES.xlsx")

#standardize some variables
std_vars = c("education", "income")
for (v in std_vars) d[[v]] = d[[v]] %>% standardize(focal_group = d$race == "White")

#fertility
d$fertility = dysgenics2$`TOTBRTH1 (TOTAL [ BIRTHS VETERAN FATHERED)`
d$fertility_confirmed = dysgenics2$`TOTBRTH2 (TOTAL [ QUALIFIED BIRTHS VET FATHERED)`

Analyses

Descriptives

#race distribution
d$race %>% table2()

#age summary stats
d$age %>% describe2()

d$fertility %>% describe2()

d %>% 
  ggplot(aes(fertility)) +
  geom_bar(aes(y = after_stat(count)/sum(after_stat(count)))) +
  scale_x_continuous("Number of children (self-report at age 38)", breaks = seq(0, 100)) +
  scale_y_continuous("Percentage", labels = scales::percent)

## Warning: Removed 7 rows containing non-finite values (`stat_count()`).

GG_save("figs/dist_fertility.png")

## Warning: Removed 7 rows containing non-finite values (`stat_count()`).

Religiousness

Select items, factor analyze score.

#items and nice names
#these were suitable items that found in a search for god, and relig
#Dutton says:
#27;  50;  53;  58;  95;  98;  115;  209;  249;  258;  369;  476;  483;  490;  491
#and, debatably, 106 and 202
#Emil picks
religion_items = c(
  priest_healing = 53,
  prophets_accurate = 58,
  church_almost_every_week = 95,
  believe_in_second_life = 115,
  very_religious = 206,
  belief_in_devil_and_hell = 249,
  belief_in_god = 258,
  only_one_true_religion = 373,
  special_agent_god = 476,
  christ_miracles = 483,
  read_bible_often = 490,
  no_patience_people_true_believers = 491
)

#prevalence (proportion TRUE)
MMPI_questions$prevalence = colMeans(MMPI_items, na.rm = T)
MMPI_questions$missing = miss_by_var(MMPI_items, prop = T)

#our subset
MMPI_questions[religion_items, ]

#copy to main
for (i in seq_along(religion_items)) {
  d[[names(religion_items)[i]]] = MMPI_items[[religion_items[i]]]
}

#religion data
religion_data = MMPI_items[religion_items] %>% 
  set_names(names(religion_items))

#missing
religion_data_miss = miss_by_case(religion_data, reverse = T)

#correlations
religion_data %>% mixedCor()

## Call: mixedCor(data = .)
##                                   prst_ prph_ ch___ bl___ vry_r b____ blf__
## priest_healing                     1.00                                    
## prophets_accurate                  0.57  1.00                              
## church_almost_every_week           0.33  0.51  1.00                        
## believe_in_second_life             0.40  0.53  0.50  1.00                  
## very_religious                     0.39  0.53  0.68  0.44  1.00            
## belief_in_devil_and_hell           0.45  0.63  0.52  0.74  0.46  1.00      
## belief_in_god                      0.38  0.66  0.57  0.82  0.58  0.85  1.00
## only_one_true_religion             0.36  0.59  0.41  0.36  0.52  0.53  0.58
## special_agent_god                  0.34  0.45  0.38  0.35  0.55  0.35  0.34
## christ_miracles                    0.48  0.56  0.55  0.67  0.50  0.70  0.84
## read_bible_often                   0.49  0.69  0.79  0.48  0.76  0.54  0.56
## no_patience_people_true_believers -0.17 -0.29 -0.30 -0.23 -0.27 -0.31 -0.29
##                                   on___ spc__ chrs_ rd_b_
## priest_healing                                           
## prophets_accurate                                        
## church_almost_every_week                                 
## believe_in_second_life                                   
## very_religious                                           
## belief_in_devil_and_hell                                 
## belief_in_god                                            
## only_one_true_religion             1.00                  
## special_agent_god                  0.43  1.00            
## christ_miracles                    0.45  0.33  1.00      
## read_bible_often                   0.57  0.54  0.57  1.00
## no_patience_people_true_believers -0.43 -0.09 -0.25 -0.34
## [1]  1.00

#MIRT
religion_fit = mirt(religion_data, model = 1, technical = list(removeEmptyRows = T))

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## 
Iteration: 1, Log-Lik: -24018.628, Max-Change: 1.29602
Iteration: 2, Log-Lik: -22252.825, Max-Change: 1.15131
Iteration: 3, Log-Lik: -21852.463, Max-Change: 1.10645
Iteration: 4, Log-Lik: -21769.643, Max-Change: 0.38262
Iteration: 5, Log-Lik: -21734.538, Max-Change: 0.40918
Iteration: 6, Log-Lik: -21716.733, Max-Change: 0.41660
Iteration: 7, Log-Lik: -21705.239, Max-Change: 0.24595
Iteration: 8, Log-Lik: -21699.488, Max-Change: 0.16613
Iteration: 9, Log-Lik: -21696.375, Max-Change: 0.08014
Iteration: 10, Log-Lik: -21695.420, Max-Change: 0.06825
Iteration: 11, Log-Lik: -21694.184, Max-Change: 0.06579
Iteration: 12, Log-Lik: -21693.506, Max-Change: 0.03398
Iteration: 13, Log-Lik: -21693.123, Max-Change: 0.01961
Iteration: 14, Log-Lik: -21692.941, Max-Change: 0.01109
Iteration: 15, Log-Lik: -21692.840, Max-Change: 0.00900
Iteration: 16, Log-Lik: -21692.697, Max-Change: 0.00249
Iteration: 17, Log-Lik: -21692.693, Max-Change: 0.00122
Iteration: 18, Log-Lik: -21692.690, Max-Change: 0.00116
Iteration: 19, Log-Lik: -21692.683, Max-Change: 0.00029
Iteration: 20, Log-Lik: -21692.683, Max-Change: 0.00028
Iteration: 21, Log-Lik: -21692.683, Max-Change: 0.00025
Iteration: 22, Log-Lik: -21692.682, Max-Change: 0.00019
Iteration: 23, Log-Lik: -21692.682, Max-Change: 0.00093
Iteration: 24, Log-Lik: -21692.682, Max-Change: 0.00044
Iteration: 25, Log-Lik: -21692.682, Max-Change: 0.00021
Iteration: 26, Log-Lik: -21692.682, Max-Change: 0.00077
Iteration: 27, Log-Lik: -21692.682, Max-Change: 0.00067
Iteration: 28, Log-Lik: -21692.682, Max-Change: 0.00020
Iteration: 29, Log-Lik: -21692.682, Max-Change: 0.00069
Iteration: 30, Log-Lik: -21692.682, Max-Change: 0.00064
Iteration: 31, Log-Lik: -21692.681, Max-Change: 0.00018
Iteration: 32, Log-Lik: -21692.681, Max-Change: 0.00063
Iteration: 33, Log-Lik: -21692.681, Max-Change: 0.00057
Iteration: 34, Log-Lik: -21692.681, Max-Change: 0.00016
Iteration: 35, Log-Lik: -21692.681, Max-Change: 0.00058
Iteration: 36, Log-Lik: -21692.681, Max-Change: 0.00051
Iteration: 37, Log-Lik: -21692.681, Max-Change: 0.00015
Iteration: 38, Log-Lik: -21692.681, Max-Change: 0.00054
Iteration: 39, Log-Lik: -21692.681, Max-Change: 0.00047
Iteration: 40, Log-Lik: -21692.681, Max-Change: 0.00014
Iteration: 41, Log-Lik: -21692.681, Max-Change: 0.00050
Iteration: 42, Log-Lik: -21692.681, Max-Change: 0.00044
Iteration: 43, Log-Lik: -21692.681, Max-Change: 0.00013
Iteration: 44, Log-Lik: -21692.681, Max-Change: 0.00046
Iteration: 45, Log-Lik: -21692.681, Max-Change: 0.00042
Iteration: 46, Log-Lik: -21692.681, Max-Change: 0.00013
Iteration: 47, Log-Lik: -21692.681, Max-Change: 0.00044
Iteration: 48, Log-Lik: -21692.681, Max-Change: 0.00039
Iteration: 49, Log-Lik: -21692.681, Max-Change: 0.00012
Iteration: 50, Log-Lik: -21692.681, Max-Change: 0.00043
Iteration: 51, Log-Lik: -21692.681, Max-Change: 0.00037
Iteration: 52, Log-Lik: -21692.681, Max-Change: 0.00012
Iteration: 53, Log-Lik: -21692.681, Max-Change: 0.00043
Iteration: 54, Log-Lik: -21692.681, Max-Change: 0.00036
Iteration: 55, Log-Lik: -21692.680, Max-Change: 0.00011
Iteration: 56, Log-Lik: -21692.680, Max-Change: 0.00042
Iteration: 57, Log-Lik: -21692.680, Max-Change: 0.00034
Iteration: 58, Log-Lik: -21692.680, Max-Change: 0.00011
Iteration: 59, Log-Lik: -21692.680, Max-Change: 0.00041
Iteration: 60, Log-Lik: -21692.680, Max-Change: 0.00032
Iteration: 61, Log-Lik: -21692.680, Max-Change: 0.00010
Iteration: 62, Log-Lik: -21692.680, Max-Change: 0.00041
Iteration: 63, Log-Lik: -21692.680, Max-Change: 0.00031
Iteration: 64, Log-Lik: -21692.680, Max-Change: 0.00010

religion_fit

## 
## Call:
## mirt(data = religion_data, model = 1, technical = list(removeEmptyRows = T))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 64 EM iterations.
## mirt version: 1.39 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -21693
## Estimated parameters: 24 
## AIC = 43433
## BIC = 43587; SABIC = 43511

religion_fit %>% summary()

##                                       F1    h2
## priest_healing                     0.599 0.359
## prophets_accurate                  0.757 0.573
## church_almost_every_week           0.763 0.582
## believe_in_second_life             0.779 0.607
## very_religious                     0.787 0.619
## belief_in_devil_and_hell           0.830 0.688
## belief_in_god                      0.954 0.909
## only_one_true_religion             0.649 0.421
## special_agent_god                  0.603 0.364
## christ_miracles                    0.811 0.658
## read_bible_often                   0.913 0.834
## no_patience_people_true_believers -0.360 0.130
## 
## SS loadings:  6.74 
## Proportion Var:  0.562 
## 
## Factor correlations: 
## 
##    F1
## F1  1

religion_fit_scores = fscores(religion_fit, full.scores = T, full.scores.SE = T)
d$religiousness = NA
d$religiousness[religion_data_miss != 0] = religion_fit_scores[, 1]
d$religiousness %<>% standardize(focal_group = (d$race == "White"))

#distribution
d$religiousness %>% GG_denhist() +
  scale_x_continuous("Religiousness (factor score)")

## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/religiousness_dist.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#psych package (just for verification)
religion_fit_psych = irt.fa(religion_data)

religion_fit_psych$fa

## Factor Analysis using method =  minres
## Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                     MR1   h2   u2 com
## priest_healing                     0.56 0.31 0.69   1
## prophets_accurate                  0.78 0.61 0.39   1
## church_almost_every_week           0.73 0.53 0.47   1
## believe_in_second_life             0.73 0.53 0.47   1
## very_religious                     0.73 0.54 0.46   1
## belief_in_devil_and_hell           0.81 0.65 0.35   1
## belief_in_god                      0.87 0.76 0.24   1
## only_one_true_religion             0.67 0.44 0.56   1
## special_agent_god                  0.53 0.28 0.72   1
## christ_miracles                    0.79 0.62 0.38   1
## read_bible_often                   0.82 0.68 0.32   1
## no_patience_people_true_believers -0.37 0.14 0.86   1
## 
##                 MR1
## SS loadings    6.10
## Proportion Var 0.51
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  66  with the objective function =  9.62 with Chi Square =  42859
## df of  the model are 54  and the objective function was  3.15 
## 
## The root mean square of the residuals (RMSR) is  0.09 
## The df corrected root mean square of the residuals is  0.1 
## 
## The harmonic n.obs is  4462 with the empirical chi square  4638  with prob <  0 
## The total n.obs was  4462  with Likelihood Chi Square =  14046  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.6
## RMSEA index =  0.241  and the 90 % confidence intervals are  0.238 0.244
## BIC =  13592
## Fit based upon off diagonal values = 0.97
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.97
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.89

religion_fit_psych_scores = scoreIrt(religion_fit_psych, religion_data)

d$religiousness2 = religion_fit_psych_scores[, 1]
d$religiousness2 %<>% standardize(focal_group = (d$race == "White"))

#scores match?
cor.test(d$religiousness, d$religiousness2)

## 
##  Pearson's product-moment correlation
## 
## data:  d$religiousness and d$religiousness2
## t = 379, df = 4460, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.984 0.986
## sample estimates:
##   cor 
## 0.985

ggplot(d, aes(religiousness, religiousness2)) +
  geom_count() +
  geom_smooth()

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#reliability
alpha(religion_data, check.keys=TRUE)

## Warning in alpha(religion_data, check.keys = TRUE): Some items were negatively correlated with total scale and were automatically reversed.
##  This is indicated by a negative sign for the variable name.

## 
## Reliability analysis   
## Call: alpha(x = religion_data, check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.79      0.79     0.8      0.24 3.8 0.0044 0.45 0.22      0.2
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.78  0.79   0.8
## Duhachek  0.78  0.79   0.8
## 
##  Reliability if an item is dropped:
##                                    raw_alpha std.alpha G6(smc) average_r S/N
## priest_healing                          0.79      0.79    0.80      0.26 3.8
## prophets_accurate                       0.76      0.76    0.78      0.23 3.2
## church_almost_every_week                0.77      0.77    0.78      0.23 3.4
## believe_in_second_life                  0.77      0.77    0.78      0.23 3.4
## very_religious                          0.78      0.77    0.78      0.24 3.4
## belief_in_devil_and_hell                0.76      0.76    0.77      0.23 3.2
## belief_in_god                           0.78      0.78    0.78      0.24 3.5
## only_one_true_religion                  0.77      0.77    0.78      0.24 3.4
## special_agent_god                       0.79      0.79    0.80      0.25 3.8
## christ_miracles                         0.77      0.77    0.78      0.23 3.3
## read_bible_often                        0.77      0.77    0.78      0.23 3.3
## no_patience_people_true_believers-      0.80      0.79    0.80      0.26 3.9
##                                    alpha se var.r med.r
## priest_healing                       0.0045 0.014  0.24
## prophets_accurate                    0.0051 0.015  0.19
## church_almost_every_week             0.0048 0.014  0.20
## believe_in_second_life               0.0048 0.013  0.20
## very_religious                       0.0048 0.014  0.20
## belief_in_devil_and_hell             0.0051 0.013  0.20
## belief_in_god                        0.0047 0.012  0.23
## only_one_true_religion               0.0048 0.016  0.19
## special_agent_god                    0.0045 0.014  0.24
## christ_miracles                      0.0049 0.013  0.20
## read_bible_often                     0.0048 0.013  0.20
## no_patience_people_true_believers-   0.0043 0.014  0.24
## 
##  Item statistics 
##                                       n raw.r std.r r.cor r.drop  mean   sd
## priest_healing                     4419  0.37  0.41  0.31   0.27 0.083 0.28
## prophets_accurate                  4292  0.67  0.65  0.61   0.55 0.505 0.50
## church_almost_every_week           4461  0.60  0.59  0.55   0.48 0.250 0.43
## believe_in_second_life             4410  0.60  0.59  0.55   0.48 0.756 0.43
## very_religious                     4449  0.56  0.58  0.53   0.46 0.171 0.38
## belief_in_devil_and_hell           4413  0.68  0.66  0.63   0.57 0.630 0.48
## belief_in_god                      4426  0.51  0.53  0.48   0.42 0.915 0.28
## only_one_true_religion             4423  0.61  0.58  0.53   0.47 0.374 0.48
## special_agent_god                  4442  0.38  0.42  0.32   0.28 0.089 0.28
## christ_miracles                    4328  0.61  0.61  0.57   0.50 0.766 0.42
## read_bible_often                   4452  0.58  0.60  0.57   0.49 0.126 0.33
## no_patience_people_true_believers- 4429  0.40  0.38  0.27   0.25 0.734 0.44
## 
## Non missing response frequency for each item
##                                      0    1 miss
## priest_healing                    0.92 0.08 0.01
## prophets_accurate                 0.50 0.50 0.04
## church_almost_every_week          0.75 0.25 0.00
## believe_in_second_life            0.24 0.76 0.01
## very_religious                    0.83 0.17 0.00
## belief_in_devil_and_hell          0.37 0.63 0.01
## belief_in_god                     0.08 0.92 0.01
## only_one_true_religion            0.63 0.37 0.01
## special_agent_god                 0.91 0.09 0.00
## christ_miracles                   0.23 0.77 0.03
## read_bible_often                  0.87 0.13 0.00
## no_patience_people_true_believers 0.73 0.27 0.01

empirical_rxx(religion_fit_scores)

##    F1 
## 0.836

marginal_rxx(religion_fit)

## [1] 0.834

plot(religion_fit, type = "rxx")

# save_plot_to_file(
#   plot(religion_fit, type = "rxx"), 
#   filename = "figs/religiousness_reliability.png")

#religiousness by race
GG_group_means(d, "religiousness", "race", type = "boxplot")

SMD_matrix(-d$religiousness, d$race, extended_output = T)

## $d
##            White  Black Hispanic  Asian  Native
## White         NA  0.352   0.1544 -0.209  0.0663
## Black     0.3522     NA  -0.1978 -0.561 -0.2859
## Hispanic  0.1544 -0.198       NA -0.364 -0.0881
## Asian    -0.2091 -0.561  -0.3635     NA  0.2755
## Native    0.0663 -0.286  -0.0881  0.275      NA
## 
## $d_string
##          White                Black                 Hispanic             
## White    NA                   "0.35 [0.26 0.44]"    "0.15 [0.01 0.30]"   
## Black    "0.35 [0.26 0.44]"   NA                    "-0.20 [-0.36 -0.03]"
## Hispanic "0.15 [0.01 0.30]"   "-0.20 [-0.36 -0.03]" NA                   
## Asian    "-0.21 [-0.55 0.13]" "-0.56 [-0.91 -0.21]" "-0.36 [-0.73 0.00]" 
## Native   "0.07 [-0.22 0.35]"  "-0.29 [-0.58 0.01]"  "-0.09 [-0.40 0.22]" 
##          Asian                 Native              
## White    "-0.21 [-0.55 0.13]"  "0.07 [-0.22 0.35]" 
## Black    "-0.56 [-0.91 -0.21]" "-0.29 [-0.58 0.01]"
## Hispanic "-0.36 [-0.73 0.00]"  "-0.09 [-0.40 0.22]"
## Asian    NA                    "0.28 [-0.16 0.71]" 
## Native   "0.28 [-0.16 0.71]"   NA                  
## 
## $CI_lower
##           White  Black Hispanic  Asian Native
## White        NA  0.260    0.012 -0.547 -0.216
## Black     0.260     NA   -0.361 -0.910 -0.579
## Hispanic  0.012 -0.361       NA -0.729 -0.401
## Asian    -0.547 -0.910   -0.729     NA -0.164
## Native   -0.216 -0.579   -0.401 -0.164     NA
## 
## $CI_upper
##          White    Black Hispanic    Asian  Native
## White       NA  0.44403  0.29676  0.12858 0.34820
## Black    0.444       NA -0.03467 -0.21297 0.00733
## Hispanic 0.297 -0.03467       NA  0.00155 0.22446
## Asian    0.129 -0.21297  0.00155       NA 0.71500
## Native   0.348  0.00733  0.22446  0.71500      NA
## 
## $se_z
## [1] 1.96
## 
## $se
##           White  Black Hispanic Asian Native
## White        NA 0.0468   0.0726 0.172  0.144
## Black    0.0468     NA   0.0833 0.178  0.150
## Hispanic 0.0726 0.0833       NA 0.186  0.159
## Asian    0.1723 0.1778   0.1863    NA  0.224
## Native   0.1438 0.1496   0.1594 0.224     NA
## 
## $p
##             White    Black Hispanic   Asian Native
## White          NA 5.43e-14   0.0336 0.22485  0.645
## Black    5.43e-14       NA   0.0175 0.00159  0.056
## Hispanic 3.36e-02 1.75e-02       NA 0.05098  0.581
## Asian    2.25e-01 1.59e-03   0.0510      NA  0.219
## Native   6.45e-01 5.60e-02   0.5808 0.21929     NA
## 
## $pairwise_n
##          White Black Hispanic Asian Native
## White       NA  4179     3854  3688   3703
## Black     4179    NA      725   559    574
## Hispanic  3854   725       NA   234    249
## Asian     3688   559      234    NA     83
## Native    3703   574      249    83     NA

Intelligence

#g tests
g_tests_early = c("VE_time1", "AR_time1", "PA", "GIT", "AFQT")
g_tests_later = c("VE_time2", "AR_time2", "WAIS_BD", "WAIS_GI", "WRAT", "PASAT", "WLGT", "copy_direct", "copy_immediate", "copy_delayed", "CVLT", "WCST", "GPT_left", "GPT_right")
g_tests = c(g_tests_early, g_tests_later)

#impute missing data
g_test_data_imp = d[g_tests] %>% miss_impute()

#standardize test variables
for (v in g_tests) d[[v]] = d[[v]] %>% standardize(focal_group = d$race == "White")

#all tests g
fa_g = fa(g_test_data_imp)
fa_g

## Factor Analysis using method =  minres
## Call: fa(r = g_test_data_imp)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 MR1   h2   u2 com
## VE_time1       0.82 0.67 0.33   1
## AR_time1       0.81 0.66 0.34   1
## PA             0.71 0.50 0.50   1
## GIT            0.69 0.48 0.52   1
## AFQT           0.85 0.73 0.27   1
## VE_time2       0.82 0.67 0.33   1
## AR_time2       0.82 0.67 0.33   1
## WAIS_BD        0.67 0.45 0.55   1
## WAIS_GI        0.76 0.58 0.42   1
## WRAT           0.73 0.53 0.47   1
## PASAT          0.57 0.32 0.68   1
## WLGT           0.49 0.24 0.76   1
## copy_direct    0.47 0.22 0.78   1
## copy_immediate 0.55 0.30 0.70   1
## copy_delayed   0.55 0.30 0.70   1
## CVLT           0.42 0.18 0.82   1
## WCST           0.46 0.21 0.79   1
## GPT_left       0.34 0.12 0.88   1
## GPT_right      0.33 0.11 0.89   1
## 
##                 MR1
## SS loadings    7.94
## Proportion Var 0.42
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  171  with the objective function =  12.6 with Chi Square =  56117
## df of  the model are 152  and the objective function was  3.89 
## 
## The root mean square of the residuals (RMSR) is  0.09 
## The df corrected root mean square of the residuals is  0.1 
## 
## The harmonic n.obs is  4462 with the empirical chi square  13120  with prob <  0 
## The total n.obs was  4462  with Likelihood Chi Square =  17324  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.655
## RMSEA index =  0.159  and the 90 % confidence intervals are  0.157 0.161
## BIC =  16047
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.97
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.89

#earlier tests only
fa_g_time1 = fa(g_test_data_imp[g_tests_early])
fa_g_time1

## Factor Analysis using method =  minres
## Call: fa(r = g_test_data_imp[g_tests_early])
## Standardized loadings (pattern matrix) based upon correlation matrix
##           MR1   h2   u2 com
## VE_time1 0.82 0.67 0.33   1
## AR_time1 0.82 0.68 0.32   1
## PA       0.70 0.49 0.51   1
## GIT      0.73 0.53 0.47   1
## AFQT     0.92 0.85 0.15   1
## 
##                 MR1
## SS loadings    3.22
## Proportion Var 0.64
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  10  with the objective function =  3.14 with Chi Square =  13984
## df of  the model are 5  and the objective function was  0.16 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic n.obs is  4462 with the empirical chi square  175  with prob <  6.6e-36 
## The total n.obs was  4462  with Likelihood Chi Square =  711  with prob <  1.7e-151 
## 
## Tucker Lewis Index of factoring reliability =  0.899
## RMSEA index =  0.178  and the 90 % confidence intervals are  0.167 0.189
## BIC =  669
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.96
## Multiple R square of scores with factors          0.93
## Minimum correlation of possible factor scores     0.85

#later tests only
fa_g_time2 = fa(g_test_data_imp[g_tests_later])
fa_g_time2

## Factor Analysis using method =  minres
## Call: fa(r = g_test_data_imp[g_tests_later])
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 MR1   h2   u2 com
## VE_time2       0.78 0.61 0.39   1
## AR_time2       0.79 0.62 0.38   1
## WAIS_BD        0.67 0.45 0.55   1
## WAIS_GI        0.72 0.52 0.48   1
## WRAT           0.71 0.50 0.50   1
## PASAT          0.58 0.33 0.67   1
## WLGT           0.50 0.25 0.75   1
## copy_direct    0.51 0.26 0.74   1
## copy_immediate 0.61 0.37 0.63   1
## copy_delayed   0.61 0.37 0.63   1
## CVLT           0.45 0.20 0.80   1
## WCST           0.47 0.22 0.78   1
## GPT_left       0.38 0.14 0.86   1
## GPT_right      0.37 0.13 0.87   1
## 
##                 MR1
## SS loadings    5.01
## Proportion Var 0.36
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  91  with the objective function =  7.17 with Chi Square =  31949
## df of  the model are 77  and the objective function was  2.8 
## 
## The root mean square of the residuals (RMSR) is  0.11 
## The df corrected root mean square of the residuals is  0.12 
## 
## The harmonic n.obs is  4462 with the empirical chi square  9397  with prob <  0 
## The total n.obs was  4462  with Likelihood Chi Square =  12486  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.54
## RMSEA index =  0.19  and the 90 % confidence intervals are  0.187 0.193
## BIC =  11839
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.95
## Multiple R square of scores with factors          0.90
## Minimum correlation of possible factor scores     0.81

#save scores, standardize to white
d$g = fa_g$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time1 = fa_g_time1$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time2 = fa_g_time2$scores[, 1] %>% standardize(focal_group = d$race == "White")

#IQ scale
d$IQ = d$g * 15 + 100

#plot dist
GG_denhist(d, "g", vline = F)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/gdist.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#reliability
alpha(g_test_data_imp)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = g_test_data_imp)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.85      0.92    0.95      0.39  12 0.0023   42 9.6     0.34
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.85  0.85  0.86
## Duhachek  0.85  0.85  0.86
## 
##  Reliability if an item is dropped:
##                raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## VE_time1            0.83      0.92    0.94      0.38  11   0.0026 0.026  0.34
## AR_time1            0.83      0.92    0.94      0.38  11   0.0027 0.027  0.33
## PA                  0.84      0.92    0.94      0.38  11   0.0025 0.029  0.33
## GIT                 0.84      0.92    0.94      0.39  11   0.0025 0.029  0.34
## AFQT                0.86      0.92    0.94      0.38  11   0.0023 0.027  0.33
## VE_time2            0.83      0.92    0.94      0.38  11   0.0027 0.026  0.34
## AR_time2            0.83      0.92    0.94      0.38  11   0.0028 0.027  0.33
## WAIS_BD             0.85      0.92    0.94      0.39  11   0.0023 0.030  0.33
## WAIS_GI             0.85      0.92    0.94      0.38  11   0.0023 0.028  0.33
## WRAT                0.84      0.92    0.94      0.38  11   0.0025 0.028  0.34
## PASAT               0.88      0.92    0.94      0.39  12   0.0017 0.031  0.34
## WLGT                0.85      0.92    0.95      0.40  12   0.0024 0.030  0.36
## copy_direct         0.85      0.92    0.95      0.40  12   0.0023 0.030  0.36
## copy_immediate      0.85      0.92    0.94      0.39  12   0.0024 0.029  0.36
## copy_delayed        0.85      0.92    0.94      0.39  12   0.0023 0.029  0.36
## CVLT                0.85      0.92    0.95      0.40  12   0.0023 0.029  0.37
## WCST                0.86      0.92    0.95      0.40  12   0.0023 0.030  0.37
## GPT_left            0.85      0.93    0.94      0.41  12   0.0023 0.027  0.37
## GPT_right           0.85      0.93    0.94      0.41  12   0.0023 0.027  0.37
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop   mean    sd
## VE_time1       4462  0.81  0.79  0.80   0.76 107.14 22.20
## AR_time1       4462  0.83  0.79  0.79   0.78 104.38 21.94
## PA             4462  0.69  0.72  0.71   0.62 104.26 22.56
## GIT            4462  0.70  0.69  0.67   0.64 102.02 18.33
## AFQT           4462  0.80  0.83  0.84   0.80   0.14  0.81
## VE_time2       4462  0.82  0.80  0.81   0.77 116.52 23.04
## AR_time2       4462  0.84  0.81  0.81   0.79 104.56 24.40
## WAIS_BD        4462  0.63  0.71  0.69   0.63  10.52  2.64
## WAIS_GI        4462  0.72  0.75  0.74   0.72  10.07  2.80
## WRAT           4462  0.75  0.73  0.72   0.71  61.16 14.74
## PASAT          4462  0.74  0.61  0.57   0.56 108.72 50.72
## WLGT           4462  0.53  0.53  0.49   0.49  35.12 10.92
## copy_direct    4462  0.44  0.54  0.49   0.43  32.73  3.31
## copy_immediate 4462  0.50  0.61  0.62   0.47  20.11  6.75
## copy_delayed   4462  0.50  0.61  0.62   0.47  20.27  6.33
## CVLT           4462  0.42  0.48  0.43   0.40  11.06  2.33
## WCST           4462  0.44  0.51  0.46   0.44   0.79  0.17
## GPT_left       4462  0.40  0.43  0.39   0.33 -77.38 13.76
## GPT_right      4462  0.39  0.42  0.38   0.33 -73.68 11.84

#method variance check
fa_g_all_methods = fa_all_methods(g_test_data_imp)

## 1 out of 40 - regression_minres
## Saving results from regression_minres
## 2 out of 40 - Thurstone_minres
## Saving results from Thurstone_minres
## 3 out of 40 - tenBerge_minres
## Saving results from tenBerge_minres
## 4 out of 40 - Anderson_minres
## Skipping Anderson_minres due to extraction error
## 5 out of 40 - Bartlett_minres
## Saving results from Bartlett_minres
## 6 out of 40 - regression_ols
## Saving results from regression_ols
## 7 out of 40 - Thurstone_ols
## Saving results from Thurstone_ols
## 8 out of 40 - tenBerge_ols
## Saving results from tenBerge_ols
## 9 out of 40 - Anderson_ols
## Skipping Anderson_ols due to extraction error
## 10 out of 40 - Bartlett_ols
## Saving results from Bartlett_ols
## 11 out of 40 - regression_wls
## Saving results from regression_wls
## 12 out of 40 - Thurstone_wls
## Saving results from Thurstone_wls
## 13 out of 40 - tenBerge_wls
## Saving results from tenBerge_wls
## 14 out of 40 - Anderson_wls
## Skipping Anderson_wls due to extraction error
## 15 out of 40 - Bartlett_wls
## Saving results from Bartlett_wls
## 16 out of 40 - regression_gls
## Saving results from regression_gls
## 17 out of 40 - Thurstone_gls
## Saving results from Thurstone_gls
## 18 out of 40 - tenBerge_gls
## Saving results from tenBerge_gls
## 19 out of 40 - Anderson_gls
## Skipping Anderson_gls due to extraction error
## 20 out of 40 - Bartlett_gls
## Saving results from Bartlett_gls
## 21 out of 40 - regression_pa
## Saving results from regression_pa
## 22 out of 40 - Thurstone_pa
## Saving results from Thurstone_pa
## 23 out of 40 - tenBerge_pa
## Saving results from tenBerge_pa
## 24 out of 40 - Anderson_pa
## Skipping Anderson_pa due to extraction error
## 25 out of 40 - Bartlett_pa
## Saving results from Bartlett_pa
## 26 out of 40 - regression_ml
## Saving results from regression_ml
## 27 out of 40 - Thurstone_ml
## Saving results from Thurstone_ml
## 28 out of 40 - tenBerge_ml
## Saving results from tenBerge_ml
## 29 out of 40 - Anderson_ml
## Skipping Anderson_ml due to extraction error
## 30 out of 40 - Bartlett_ml
## Saving results from Bartlett_ml
## 31 out of 40 - regression_minchi
## Saving results from regression_minchi
## 32 out of 40 - Thurstone_minchi
## Saving results from Thurstone_minchi
## 33 out of 40 - tenBerge_minchi
## Saving results from tenBerge_minchi
## 34 out of 40 - Anderson_minchi
## Skipping Anderson_minchi due to extraction error
## 35 out of 40 - Bartlett_minchi
## Saving results from Bartlett_minchi
## 36 out of 40 - regression_minrank
## Saving results from regression_minrank
## 37 out of 40 - Thurstone_minrank
## Saving results from Thurstone_minrank
## 38 out of 40 - tenBerge_minrank
## Saving results from tenBerge_minrank
## 39 out of 40 - Anderson_minrank
## Skipping Anderson_minrank due to extraction error
## 40 out of 40 - Bartlett_minrank
## Saving results from Bartlett_minrank

fa_g_all_methods$scores %>% GG_heatmap()

GG_save("figs/fa_methods.png")

fa_g_all_methods$scores %>% 
  cor() %>% 
  MAT_half(diag = F) %>% 
  describe2()

Main results

#correlations
d %>% select(fertility, g, religiousness, education, income) %>% 
  cor_matrix(p_val = T)

##               fertility  g          religiousness education  income    
## fertility     "1"        "-0.11***" "0.17***"     "-0.10***" "-0.02"   
## g             "-0.11***" "1"        "-0.18***"    "0.55***"  "0.40***" 
## religiousness "0.17***"  "-0.18***" "1"           "-0.09***" "-0.11***"
## education     "-0.10***" "0.55***"  "-0.09***"    "1"        "0.35***" 
## income        "-0.02"    "0.40***"  "-0.11***"    "0.35***"  "1"

d %>% select(fertility, g, religiousness, education, income) %>% 
  polycor::hetcor()

## 
## Two-Step Estimates
## 
## Correlations/Type of Correlation:
##               fertility       g religiousness education  income
## fertility             1 Pearson       Pearson   Pearson Pearson
## g                -0.108       1       Pearson   Pearson Pearson
## religiousness     0.174  -0.184             1   Pearson Pearson
## education        -0.105   0.545       -0.0907         1 Pearson
## income          -0.0212   0.397        -0.108     0.349       1
## 
## Standard Errors:
##               fertility      g religiousness education
## fertility                                             
## g                0.0149                               
## religiousness    0.0147 0.0146                        
## education         0.015 0.0106         0.015          
## income           0.0151 0.0127        0.0149    0.0133
## 
## n = 4373 
## 
## P-values for Tests of Bivariate Normality:
##               fertility         g religiousness education
## fertility                                                
## g              7.49e-56                                  
## religiousness  5.62e-57  1.14e-29                        
## education      1.5e-290 6.04e-257     1.74e-258          
## income         1.78e-69  4.34e-25      1.17e-27 9.71e-266

#is relationship linear?
GG_scatter(d, "religiousness", "fertility") +
  geom_smooth()

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/religiousness_fertility.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#models
#standardize variables of interest
d_z = d %>% select(fertility, fertility_confirmed, religiousness, g, age, race) %>% df_standardize() %>% mutate(g = standardize(g, focal_group = (race == "White")))

## Skipped race because it is class factor

d_z_white = d_z %>% filter(race == "White")
d_z_black = d_z %>% filter(race == "Black")
d_z_bw = bind_rows(d_z_white, d_z_black) %>% mutate(race = fct_drop(race))

main_models = list(
  no_controls = ols(fertility ~ religiousness + g, data = d_z),
  controls = ols(fertility ~ religiousness + g + race + age, data = d_z),
  interact = ols(fertility ~ g * religiousness + race + age, data = d_z),
  g_spline = ols(fertility ~ rcs(g) * religiousness + race + age, data = d_z),
  reli_spline = ols(fertility ~ g * rcs(religiousness) + race + age, data = d_z),
  both_spline = ols(fertility ~ rcs(g )* rcs(religiousness) + race + age, data = d_z),
  white_interact = ols(fertility ~ g * religiousness + age, data = d_z_white),
  black_interact = ols(fertility ~ g * religiousness + age, data = d_z_black),
  race_3interact = ols(fertility ~ g * religiousness * race + age, data = d_z_bw),
  interact_noage = ols(fertility ~ g * religiousness + race, data = d_z)
)

#p values for model comparisons
#test linear interaction
lrtest(main_models[[2]], main_models[[3]])

## 
## Model 1: fertility ~ religiousness + g + race + age
## Model 2: fertility ~ g * religiousness + race + age
## 
## L.R. Chisq       d.f.          P 
##    9.66988    1.00000    0.00187

#test nonlinear g effect
lrtest(main_models[[3]], main_models[[4]])

## 
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ rcs(g) * religiousness + race + age
## 
## L.R. Chisq       d.f.          P 
##      1.939      6.000      0.925

#test nonlinear religion effect
lrtest(main_models[[3]], main_models[[5]])

## 
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ g * rcs(religiousness) + race + age
## 
## L.R. Chisq       d.f.          P 
##      9.362      6.000      0.154

#test nonlinear for both
lrtest(main_models[[3]], main_models[[6]])

## 
## Model 1: fertility ~ g * religiousness + race + age
## Model 2: fertility ~ rcs(g) * rcs(religiousness) + race + age
## 
## L.R. Chisq       d.f.          P 
##     17.366     21.000      0.689

#summarise
main_models[c(1:3, 7:8)] %>% 
  summarize_models(asterisks_only = F)

#visualize
#interaction with controls
ggpredict(main_models$interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("General intelligence (0 = White mean)") +
  scale_y_continuous("Number of children at age 38") +
  ggtitle(NULL)

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/g_fertility_interaction.png")

#whites
ggpredict(main_models$white_interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("General intelligence (0 = White mean)") +
  scale_y_continuous("Number of children at age 38") +
  ggtitle(NULL)

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/g_fertility_interaction_whites.png")

#using unstandardized units
unstd_interact_model = ols(fertility ~ IQ * religiousness + age + race, data = d)
unstd_interact_model

## Frequencies of Missing Values Due to Each Variable
##     fertility            IQ religiousness           age          race 
##             7             0             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ IQ * religiousness + age + race, data = d)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4455    LR chi2    222.56    R2       0.049    
## sigma1.3250    d.f.            8    R2 adj   0.047    
## d.f.   4446    Pr(> chi2) 0.0000    g        0.337    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -2.60305 -0.97301  0.07147  0.83996  9.53461 
## 
## 
##                    Coef    S.E.   t     Pr(>|t|)
## Intercept           1.2467 0.3223  3.87 0.0001  
## IQ                 -0.0041 0.0014 -2.99 0.0028  
## religiousness      -0.1918 0.1320 -1.45 0.1463  
## age                 0.0242 0.0079  3.06 0.0022  
## race=Black          0.3353 0.0672  4.99 <0.0001 
## race=Hispanic       0.4560 0.0979  4.66 <0.0001 
## race=Asian          0.0271 0.2284  0.12 0.9055  
## race=Native         0.1552 0.1928  0.80 0.4210  
## IQ * religiousness  0.0040 0.0013  3.11 0.0019

#visualize
#interaction with controls
ggpredict(unstd_interact_model, terms = c("IQ [70:130]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("General intelligence (100 = White mean)") +
  scale_y_continuous("Number of children at age 38") +
  ggtitle(NULL)

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Count variable models

#standardize non-fertility variables
d_z_notfert = d %>% select(fertility, religiousness, g, age, race) %>% df_standardize(exclude = "fertility") %>% mutate(g = standardize(g, focal_group = (race == "White")))

## Skipped fertility because it is in the exclude list

## Skipped race because it is class factor

#hurdle models
count_models = list(
  poisson = glm(fertility ~ g * religiousness + race + age, data = d_z_notfert, family = "poisson"),
  zeroinflation = pscl::zeroinfl(fertility ~ g * religiousness + race + age, data = d_z_notfert),
  hurdle = pscl::hurdle(fertility ~ g * religiousness + race + age, data = d_z_notfert)
)

map(count_models, summary)

## $poisson
## 
## Call:
## glm(formula = fertility ~ g * religiousness + race + age, family = "poisson", 
##     data = d_z_notfert)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.5638     0.0126   44.78  < 2e-16 ***
## g                -0.0381     0.0115   -3.32  0.00089 ***
## religiousness     0.1174     0.0113   10.34  < 2e-16 ***
## raceBlack         0.1641     0.0349    4.70  2.6e-06 ***
## raceHispanic      0.2239     0.0493    4.54  5.6e-06 ***
## raceAsian         0.0113     0.1308    0.09  0.93094    
## raceNative        0.0844     0.1052    0.80  0.42214    
## age               0.0329     0.0111    2.97  0.00296 ** 
## g:religiousness   0.0402     0.0107    3.77  0.00016 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 5431.5  on 4454  degrees of freedom
## Residual deviance: 5210.5  on 4446  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 14663
## 
## Number of Fisher Scoring iterations: 5
## 
## 
## $zeroinflation
## 
## Call:
## pscl::zeroinfl(formula = fertility ~ g * religiousness + race + age, 
##     data = d_z_notfert)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5594 -0.6607  0.0561  0.5751  6.6944 
## 
## Count model coefficients (poisson with log link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.6266     0.0155   40.31  < 2e-16 ***
## g                -0.0189     0.0142   -1.33   0.1841    
## religiousness     0.0937     0.0138    6.81  9.7e-12 ***
## raceBlack         0.1788     0.0407    4.39  1.1e-05 ***
## raceHispanic      0.2501     0.0569    4.39  1.1e-05 ***
## raceAsian         0.0533     0.1528    0.35   0.7274    
## raceNative        0.0588     0.1161    0.51   0.6128    
## age               0.0393     0.0135    2.90   0.0037 ** 
## g:religiousness   0.0279     0.0129    2.17   0.0303 *  
## 
## Zero-inflation model coefficients (binomial with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -2.8624     0.1915  -14.95   <2e-16 ***
## g                 0.2797     0.1752    1.60    0.110    
## religiousness    -0.3589     0.1526   -2.35    0.019 *  
## raceBlack         0.2719     0.4944    0.55    0.582    
## raceHispanic      0.4616     0.5308    0.87    0.385    
## raceAsian         0.5186     1.0014    0.52    0.605    
## raceNative       -0.6580     1.8853   -0.35    0.727    
## age               0.1094     0.1521    0.72    0.472    
## g:religiousness  -0.0955     0.1358   -0.70    0.482    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 29 
## Log-likelihood: -7.29e+03 on 18 Df
## 
## $hurdle
## 
## Call:
## pscl::hurdle(formula = fertility ~ g * religiousness + race + age, data = d_z_notfert)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5485 -0.6656  0.0543  0.5765  6.6910 
## 
## Count model coefficients (truncated poisson with log link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.6271     0.0155   40.33  < 2e-16 ***
## g                -0.0186     0.0141   -1.33   0.1848    
## religiousness     0.0930     0.0139    6.67  2.5e-11 ***
## raceBlack         0.1827     0.0409    4.46  8.1e-06 ***
## raceHispanic      0.2509     0.0566    4.43  9.4e-06 ***
## raceAsian         0.0500     0.1592    0.31   0.7533    
## raceNative        0.0228     0.1287    0.18   0.8591    
## age               0.0401     0.0134    2.99   0.0028 ** 
## g:religiousness   0.0341     0.0131    2.61   0.0090 ** 
## Zero hurdle model coefficients (binomial with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.3780     0.0425   32.41  < 2e-16 ***
## g                -0.1204     0.0402   -2.99   0.0028 ** 
## religiousness     0.2730     0.0393    6.94  3.9e-12 ***
## raceBlack         0.2196     0.1417    1.55   0.1211    
## raceHispanic      0.2513     0.2056    1.22   0.2218    
## raceAsian        -0.0884     0.4129   -0.21   0.8304    
## raceNative        0.4104     0.4150    0.99   0.3227    
## age               0.0280     0.0384    0.73   0.4659    
## g:religiousness   0.0593     0.0373    1.59   0.1120    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 15 
## Log-likelihood: -7.29e+03 on 18 Df

#visualize
p_poisson = ggpredict(count_models$poisson, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("g") +
  scale_y_continuous("Number of children at age 38") +
  ggtitle(NULL)

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

p_poisson

p_zeroinf = ggpredict(count_models$zeroinflation, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("g") +
  scale_y_continuous(NULL) +
  ggtitle(NULL)

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

p_zeroinf

p_count = ggpredict(count_models$hurdle, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("g") +
  scale_y_continuous(NULL) +
  ggtitle(NULL)

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

p_count

(p_poisson + theme(legend.position = "none")) + (p_zeroinf  + theme(legend.position = "none")) + p_count

GG_save("figs/count_models.png")

Alternative fertility variable

alt_fert_models = list(
  no_controls = ols(fertility_confirmed ~ religiousness + g, data = d_z),
  controls = ols(fertility_confirmed ~ religiousness + g + race + age, data = d_z),
  interact = ols(fertility_confirmed ~ g * religiousness + race + age, data = d_z),
  g_spline = ols(fertility_confirmed ~ rcs(g) * religiousness + race + age, data = d_z),
  reli_spline = ols(fertility_confirmed ~ g * rcs(religiousness) + race + age, data = d_z),
  both_spline = ols(fertility_confirmed ~ rcs(g )* rcs(religiousness) + race + age, data = d_z),
  white_interact = ols(fertility_confirmed ~ g * religiousness + age, data = d_z_white),
  black_interact = ols(fertility_confirmed ~ g * religiousness + age, data = d_z_black),
  race_3interact = ols(fertility_confirmed ~ g * religiousness * race + age, data = d_z_bw),
  interact_noage = ols(fertility_confirmed ~ g * religiousness + race, data = d_z)
)

#p values for model comparisons
#test linear interaction
lrtest(alt_fert_models[[2]], alt_fert_models[[3]])

## 
## Model 1: fertility_confirmed ~ religiousness + g + race + age
## Model 2: fertility_confirmed ~ g * religiousness + race + age
## 
## L.R. Chisq       d.f.          P 
##    8.09640    1.00000    0.00444

#test nonlinear g effect
lrtest(alt_fert_models[[3]], alt_fert_models[[4]])

## 
## Model 1: fertility_confirmed ~ g * religiousness + race + age
## Model 2: fertility_confirmed ~ rcs(g) * religiousness + race + age
## 
## L.R. Chisq       d.f.          P 
##      3.493      6.000      0.745

#test nonlinear religion effect
lrtest(alt_fert_models[[3]], alt_fert_models[[5]])

## 
## Model 1: fertility_confirmed ~ g * religiousness + race + age
## Model 2: fertility_confirmed ~ g * rcs(religiousness) + race + age
## 
## L.R. Chisq       d.f.          P 
##      7.246      6.000      0.299

#test nonlinear for both
lrtest(alt_fert_models[[3]], alt_fert_models[[6]])

## 
## Model 1: fertility_confirmed ~ g * religiousness + race + age
## Model 2: fertility_confirmed ~ rcs(g) * rcs(religiousness) + race + age
## 
## L.R. Chisq       d.f.          P 
##     18.990     21.000      0.586

#summarise
alt_fert_models[c(1:3, 7:8)] %>% 
  summarize_models(asterisks_only = F)

#visualize
#interaction with controls
ggpredict(alt_fert_models$interact, terms = c("g [-2:2]", "religiousness [-2, -1, 0, 1, 2]")) %>% 
  plot() +
  scale_x_continuous("General intelligence (0 = White mean)") +
  scale_y_continuous("Number of children at age 38 (confirmed)") +
  ggtitle(NULL)

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Full model outputs

main_models

## $no_controls
## Frequencies of Missing Values Due to Each Variable
##     fertility religiousness             g 
##             7             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ religiousness + g, data = d_z)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4455    LR chi2    163.77    R2       0.036    
## sigma0.9820    d.f.            2    R2 adj   0.036    
## d.f.   4452    Pr(> chi2) 0.0000    g        0.215    
## 
## Residuals
## 
##     Min      1Q  Median      3Q     Max 
## -1.8522 -0.7269  0.0395  0.6593  7.2161 
## 
## 
##               Coef    S.E.   t     Pr(>|t|)
## Intercept     -0.0142 0.0150 -0.95 0.3423  
## religiousness  0.1593 0.0150 10.64 <0.0001 
## g             -0.0731 0.0140 -5.23 <0.0001 
## 
## 
## $controls
## Frequencies of Missing Values Due to Each Variable
##     fertility religiousness             g          race           age 
##             7             0             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ religiousness + g + race + age, data = d_z)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4455    LR chi2    212.89    R2       0.047    
## sigma0.9772    d.f.            7    R2 adj   0.045    
## d.f.   4447    Pr(> chi2) 0.0000    g        0.244    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1.93412 -0.72300  0.05617  0.60558  6.97791 
## 
## 
##               Coef    S.E.   t     Pr(>|t|)
## Intercept     -0.0524 0.0162 -3.24 0.0012  
## religiousness  0.1549 0.0149 10.38 <0.0001 
## g             -0.0426 0.0151 -2.81 0.0049  
## race=Black     0.2359 0.0494  4.77 <0.0001 
## race=Hispanic  0.3365 0.0722  4.66 <0.0001 
## race=Asian     0.0142 0.1684  0.08 0.9328  
## race=Native    0.1026 0.1421  0.72 0.4703  
## age            0.0440 0.0147  2.99 0.0028  
## 
## 
## $interact
## Frequencies of Missing Values Due to Each Variable
##     fertility             g religiousness          race           age 
##             7             0             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ g * religiousness + race + age, data = d_z)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4455    LR chi2    222.56    R2       0.049    
## sigma0.9762    d.f.            8    R2 adj   0.047    
## d.f.   4446    Pr(> chi2) 0.0000    g        0.248    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1.91778 -0.71686  0.05265  0.61883  7.02457 
## 
## 
##                   Coef    S.E.   t     Pr(>|t|)
## Intercept         -0.0453 0.0163 -2.78 0.0055  
## g                 -0.0433 0.0151 -2.86 0.0043  
## religiousness      0.1525 0.0149 10.21 <0.0001 
## race=Black         0.2470 0.0495  4.99 <0.0001 
## race=Hispanic      0.3360 0.0721  4.66 <0.0001 
## race=Asian         0.0200 0.1683  0.12 0.9055  
## race=Native        0.1143 0.1421  0.80 0.4210  
## age                0.0450 0.0147  3.06 0.0022  
## g * religiousness  0.0437 0.0141  3.11 0.0019  
## 
## 
## $g_spline
## Frequencies of Missing Values Due to Each Variable
##     fertility             g religiousness          race           age 
##             7             0             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ rcs(g) * religiousness + race + age, 
##     data = d_z)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4455    LR chi2    224.50    R2       0.049    
## sigma0.9767    d.f.           14    R2 adj   0.046    
## d.f.   4440    Pr(> chi2) 0.0000    g        0.249    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1.90776 -0.71787  0.05121  0.61450  6.97383 
## 
## 
##                      Coef    S.E.   t     Pr(>|t|)
## Intercept            -0.0238 0.1184 -0.20 0.8407  
## g                    -0.0321 0.0707 -0.45 0.6499  
## g'                   -0.0613 0.3078 -0.20 0.8421  
## g''                   0.2891 1.5033  0.19 0.8475  
## g'''                 -0.4297 2.6092 -0.16 0.8692  
## religiousness         0.3081 0.1348  2.29 0.0223  
## race=Black            0.2467 0.0501  4.92 <0.0001 
## race=Hispanic         0.3340 0.0722  4.62 <0.0001 
## race=Asian            0.0243 0.1684  0.14 0.8853  
## race=Native           0.1112 0.1422  0.78 0.4343  
## age                   0.0458 0.0148  3.09 0.0020  
## g * religiousness     0.1285 0.0815  1.58 0.1149  
## g' * religiousness   -0.4275 0.3398 -1.26 0.2084  
## g'' * religiousness   1.9416 1.5934  1.22 0.2231  
## g''' * religiousness -2.7658 2.6287 -1.05 0.2928  
## 
## 
## $reli_spline
## Frequencies of Missing Values Due to Each Variable
##     fertility             g religiousness          race           age 
##             7             0             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ g * rcs(religiousness) + race + age, 
##     data = d_z)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4455    LR chi2    231.93    R2       0.051    
## sigma0.9758    d.f.           14    R2 adj   0.048    
## d.f.   4440    Pr(> chi2) 0.0000    g        0.255    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1.89874 -0.72248  0.04202  0.60719  7.04277 
## 
## 
##                      Coef    S.E.   t     Pr(>|t|)
## Intercept            -0.2308 0.1188 -1.94 0.0521  
## g                    -0.2353 0.1145 -2.06 0.0399  
## religiousness         0.0536 0.0789  0.68 0.4971  
## religiousness'        0.4724 0.2820  1.68 0.0940  
## religiousness''      -3.7145 2.4873 -1.49 0.1354  
## religiousness'''      6.3026 5.2787  1.19 0.2326  
## race=Black            0.2478 0.0495  5.00 <0.0001 
## race=Hispanic         0.3354 0.0722  4.65 <0.0001 
## race=Asian            0.0243 0.1683  0.14 0.8854  
## race=Native           0.1173 0.1421  0.83 0.4092  
## age                   0.0449 0.0147  3.05 0.0023  
## g * religiousness    -0.0788 0.0765 -1.03 0.3033  
## g * religiousness'    0.4075 0.2698  1.51 0.1309  
## g * religiousness''  -2.1347 2.3325 -0.92 0.3601  
## g * religiousness'''  2.0103 4.8920  0.41 0.6811  
## 
## 
## $both_spline
## Frequencies of Missing Values Due to Each Variable
##     fertility             g religiousness          race           age 
##             7             0             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ rcs(g) * rcs(religiousness) + race + 
##     age, data = d_z)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4455    LR chi2    239.93    R2       0.052    
## sigma0.9766    d.f.           29    R2 adj   0.046    
## d.f.   4425    Pr(> chi2) 0.0000    g        0.259    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1.91741 -0.72974  0.03742  0.61557  6.99849 
## 
## 
##                         Coef      S.E.     t     Pr(>|t|)
## Intercept                  1.2654   1.2414  1.02 0.3081  
## g                          0.6766   0.7895  0.86 0.3915  
## g'                        -3.3279   3.0077 -1.11 0.2686  
## g''                       12.9625  13.4439  0.96 0.3350  
## g'''                     -16.7864  21.3243 -0.79 0.4312  
## religiousness              1.2076   0.9432  1.28 0.2005  
## religiousness'            -2.9048   2.7836 -1.04 0.2967  
## religiousness''           21.2222  21.3402  0.99 0.3200  
## religiousness'''         -38.8329  42.2663 -0.92 0.3583  
## race=Black                 0.2483   0.0501  4.95 <0.0001 
## race=Hispanic              0.3345   0.0723  4.63 <0.0001 
## race=Asian                 0.0357   0.1687  0.21 0.8324  
## race=Native                0.1144   0.1424  0.80 0.4219  
## age                        0.0463   0.0148  3.12 0.0018  
## g * religiousness          0.5951   0.6166  0.97 0.3345  
## g' * religiousness        -2.7538   2.2194 -1.24 0.2148  
## g'' * religiousness       11.3009   9.5016  1.19 0.2344  
## g''' * religiousness     -14.9732  14.3342 -1.04 0.2963  
## g * religiousness'        -1.7027   1.7532 -0.97 0.3315  
## g' * religiousness'        7.2019   6.8291  1.05 0.2917  
## g'' * religiousness'     -27.7604  31.0647 -0.89 0.3716  
## g''' * religiousness'     37.9095  50.3064  0.75 0.4511  
## g * religiousness''       13.9435  13.0212  1.07 0.2843  
## g' * religiousness''     -51.1805  54.1905 -0.94 0.3450  
## g'' * religiousness''    202.2231 257.4386  0.79 0.4322  
## g''' * religiousness''  -314.5354 435.9250 -0.72 0.4706  
## g * religiousness'''     -27.4379  25.3689 -1.08 0.2795  
## g' * religiousness'''     92.8035 109.3950  0.85 0.3963  
## g'' * religiousness'''  -384.7428 531.1933 -0.72 0.4689  
## g''' * religiousness'''  659.4486 918.9303  0.72 0.4730  
## 
## 
## $white_interact
## Frequencies of Missing Values Due to Each Variable
##     fertility             g religiousness           age 
##             5             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ g * religiousness + age, data = d_z_white)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    3649    LR chi2    125.88    R2       0.034    
## sigma0.9342    d.f.            4    R2 adj   0.033    
## d.f.   3644    Pr(> chi2) 0.0000    g        0.189    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1.74834 -0.65230  0.09718  0.63652  6.79781 
## 
## 
##                   Coef    S.E.   t     Pr(>|t|)
## Intercept         -0.0427 0.0157 -2.72 0.0065  
## g                 -0.0301 0.0158 -1.91 0.0564  
## religiousness      0.1368 0.0158  8.67 <0.0001 
## age                0.0341 0.0157  2.17 0.0297  
## g * religiousness  0.0640 0.0157  4.08 <0.0001 
## 
## 
## $black_interact
## Linear Regression Model
## 
## ols(formula = fertility ~ g * religiousness + age, data = d_z_black)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     525    LR chi2     20.30    R2       0.038    
## sigma1.1791    d.f.            4    R2 adj   0.031    
## d.f.    520    Pr(> chi2) 0.0004    g        0.264    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -2.05459 -0.87755 -0.06329  0.65450  6.81865 
## 
## 
##                   Coef    S.E.   t     Pr(>|t|)
## Intercept          0.1053 0.0947  1.11 0.2666  
## g                 -0.0902 0.0627 -1.44 0.1505  
## religiousness      0.2000 0.0938  2.13 0.0335  
## age                0.1021 0.0497  2.05 0.0404  
## g * religiousness -0.0120 0.0654 -0.18 0.8551  
## 
## 
## $race_3interact
## Frequencies of Missing Values Due to Each Variable
##     fertility             g religiousness          race           age 
##             5             0             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ g * religiousness * race + age, data = d_z_bw)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4174    LR chi2    200.23    R2       0.047    
## sigma0.9683    d.f.            8    R2 adj   0.045    
## d.f.   4165    Pr(> chi2) 0.0000    g        0.232    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1.99307 -0.67747  0.08242  0.63613  6.92578 
## 
## 
##                                Coef    S.E.   t     Pr(>|t|)
## Intercept                      -0.0427 0.0162 -2.63 0.0086  
## g                              -0.0310 0.0163 -1.90 0.0573  
## religiousness                   0.1364 0.0163  8.34 <0.0001 
## race=Black                      0.1468 0.0794  1.85 0.0646  
## age                             0.0435 0.0151  2.88 0.0041  
## g * religiousness               0.0642 0.0163  3.95 <0.0001 
## g * race=Black                 -0.0587 0.0540 -1.09 0.2768  
## religiousness * race=Black      0.0631 0.0788  0.80 0.4235  
## g * religiousness * race=Black -0.0787 0.0561 -1.40 0.1608  
## 
## 
## $interact_noage
## Frequencies of Missing Values Due to Each Variable
##     fertility             g religiousness          race 
##             7             0             0             0 
## 
## Linear Regression Model
## 
## ols(formula = fertility ~ g * religiousness + race, data = d_z)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs    4455    LR chi2    213.18    R2       0.047    
## sigma0.9771    d.f.            7    R2 adj   0.045    
## d.f.   4447    Pr(> chi2) 0.0000    g        0.242    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -1.85311 -0.70404  0.05942  0.61251  7.10581 
## 
## 
##                   Coef    S.E.   t     Pr(>|t|)
## Intercept         -0.0452 0.0163 -2.76 0.0057  
## g                 -0.0392 0.0151 -2.60 0.0094  
## religiousness      0.1537 0.0149 10.29 <0.0001 
## race=Black         0.2506 0.0496  5.06 <0.0001 
## race=Hispanic      0.3404 0.0722  4.72 <0.0001 
## race=Asian         0.0163 0.1684  0.10 0.9231  
## race=Native        0.1051 0.1422  0.74 0.4597  
## g * religiousness  0.0428 0.0141  3.04 0.0024

Jensen method

d_jensen_method = bind_cols(
  g_test_data_imp,
  fertility = d$fertility
)

fa_Jensens_method(fa_g, df = d_jensen_method, "fertility") +
  scale_y_continuous("Correlation of test with fertility") +
  scale_x_continuous("Loading on the general factor of intelligence")

## Using Pearson correlations for the criterion-indicators relationships.

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/jensen_method.png")

## `geom_smooth()` using formula = 'y ~ x'

#extrapolate to g-loading = 1
d_jensen_method_data = tibble(
  test = g_tests,
  g_loading = fa_g$loadings[, 1],
  r_fertility = map_dbl(g_test_data_imp, ~wtd.cors(d$fertility, .))
)

#linear model
(jensen_fit = ols(r_fertility ~ g_loading, data = d_jensen_method_data))

## Linear Regression Model
## 
## ols(formula = r_fertility ~ g_loading, data = d_jensen_method_data)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs      19    LR chi2     14.27    R2       0.528    
## sigma0.0279    d.f.            1    R2 adj   0.500    
## d.f.     17    Pr(> chi2) 0.0002    g        0.034    
## 
## Residuals
## 
##       Min        1Q    Median        3Q       Max 
## -0.046215 -0.021412  0.002139  0.018504  0.051269 
## 
## 
##           Coef    S.E.   t     Pr(>|t|)
## Intercept  0.0380 0.0247  1.54 0.1427  
## g_loading -0.1669 0.0383 -4.36 0.0004

#slope + intercept
coef(jensen_fit)[1] + coef(jensen_fit)[2]

## Intercept 
##    -0.129

Vietnam Experience Study: Religiousness and intelligence

Init

Functions

Data

Analyses

Descriptives

Religiousness

Intelligence

Main results

Count variable models

Alternative fertility variable

Full model outputs

Jensen method

Meta