Init

options(
  digits = 2,
  contrasts=c("contr.treatment", "contr.treatment"),
  pillar.print_max = 1000
  )

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: weights
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: assertthat
## 
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
## 
##     has_name
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## Attaching package: 'kirkegaard'
## The following object is masked from 'package:psych':
## 
##     rescale
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  haven, 
  rms,
  mirt
  )
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## Loading required package: stats4
theme_set(theme_bw())

Data

#https://gssdataexplorer.norc.org/pages/show?page=gss%2Fgss_data
#2018, release 3
d = read_sav("data/GSS_spss/GSS7218_R3.sav")
# d_vars = df_var_table(d)

Functions

#wave white standardize
do_cwz = function(variable, data, whites = T) {
  plyr::ddply(data, "YEAR", function(dd) {
    #white subset
    # browser()
    if (whites) {
      tibble(
        var = dd[[variable]] %>% standardize(focal_group = dd$race_3way == "WHITE")
      ) %>% return()
    }
    
    #everybody
    tibble(
      var = dd[[variable]] %>% standardize()
    ) %>% return()
  }) %>% pull(var)
}

Recode

#race simple
d %<>% mutate(
  race_3way = RACE %>% as_factor() %>% fct_relevel("WHITE") %>% fct_drop()
)

d$race_3way %>% table2(include_NA = F)
#sex, age, year
d %<>% mutate(
  SEX = SEX %>% as_factor(),
  YEAR = YEAR %>% as.numeric(),
  AGE = AGE %>% as.numeric(),
  height_cm = HEIGHT * 2.54,
  weight_kg = WEIGHT * 0.453592,
  BMI = weight_kg / (height_cm/100)^2
)


#politics
d %<>% mutate(
  pol_ideo_self = POLVIEWS %>% as_factor() %>% mapvalues(from = c("DK", "NA", "IAP"), to = rep(NA, 3)),
  pol_ideo_self_num = pol_ideo_self %>% as.numeric() %>% subtract(1) %>% rescale(new_min = 0, new_max = 1)
)

d$pol_ideo_self_num %>% describe2()
#education degrees and IQ
d %<>% mutate(
  WORDSUM = WORDSUM %>% as.numeric(),
  wordsum_z = standardize(WORDSUM),
  wordsum_wz = standardize(WORDSUM, focal_group = (race_3way == "WHITE")),
  wordsum_cwz = do_cwz(variable = "WORDSUM", whites = T, data = .)
)

#wordsum g
fa_wordsum = mirt::mirt(
  d %>% select(WORDA:WORDH) %>% map_df(as.numeric),
  model = 1
)
## Warning: data contains response patterns with only NAs
## 
Iteration: 1, Log-Lik: -104263.197, Max-Change: 0.71642
Iteration: 2, Log-Lik: -101244.504, Max-Change: 0.37105
Iteration: 3, Log-Lik: -100582.695, Max-Change: 0.18657
Iteration: 4, Log-Lik: -100460.004, Max-Change: 0.14463
Iteration: 5, Log-Lik: -100414.350, Max-Change: 0.07458
Iteration: 6, Log-Lik: -100396.651, Max-Change: 0.09150
Iteration: 7, Log-Lik: -100388.716, Max-Change: 0.04054
Iteration: 8, Log-Lik: -100385.023, Max-Change: 0.02814
Iteration: 9, Log-Lik: -100383.328, Max-Change: 0.01823
Iteration: 10, Log-Lik: -100382.175, Max-Change: 0.00372
Iteration: 11, Log-Lik: -100382.107, Max-Change: 0.00255
Iteration: 12, Log-Lik: -100382.069, Max-Change: 0.00280
Iteration: 13, Log-Lik: -100382.045, Max-Change: 0.00255
Iteration: 14, Log-Lik: -100382.024, Max-Change: 0.00107
Iteration: 15, Log-Lik: -100382.017, Max-Change: 0.00048
Iteration: 16, Log-Lik: -100382.016, Max-Change: 0.00032
Iteration: 17, Log-Lik: -100382.016, Max-Change: 0.00023
Iteration: 18, Log-Lik: -100382.015, Max-Change: 0.00022
Iteration: 19, Log-Lik: -100382.014, Max-Change: 0.00014
Iteration: 20, Log-Lik: -100382.014, Max-Change: 0.00013
Iteration: 21, Log-Lik: -100382.014, Max-Change: 0.00048
Iteration: 22, Log-Lik: -100382.014, Max-Change: 0.00015
Iteration: 23, Log-Lik: -100382.014, Max-Change: 0.00012
Iteration: 24, Log-Lik: -100382.014, Max-Change: 0.00011
Iteration: 25, Log-Lik: -100382.014, Max-Change: 0.00002
#details
fa_wordsum
## 
## Call:
## mirt::mirt(data = d %>% select(WORDA:WORDH) %>% map_df(as.numeric), 
##     model = 1)
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 25 EM iterations.
## mirt version: 1.36.1 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -1e+05
## Estimated parameters: 16 
## AIC = 2e+05
## BIC = 2e+05; SABIC = 2e+05
fa_wordsum %>% summary
##          F1    h2
## WORDA 0.495 0.245
## WORDB 0.796 0.633
## WORDC 0.559 0.313
## WORDD 0.806 0.649
## WORDE 0.725 0.526
## WORDF 0.729 0.532
## WORDG 0.479 0.229
## WORDH 0.623 0.388
## 
## SS loadings:  3.5 
## Proportion Var:  0.44 
## 
## Factor correlations: 
## 
##    F1
## F1  1
#scores
fa_wordsum_scores = mirt::fscores(
  fa_wordsum,
  full.scores = TRUE, 
  full.scores.SE = TRUE
)

#reliability
mirt::marginal_rxx(fa_wordsum)
## [1] 0.63
# mirt::empirical_rxx(fa_wordsum_scores) #error

#save scores
d$g = fa_wordsum_scores[, 1] %>% standardize(focal_group = (d$race_3way == "WHITE"))

#without cohort changes, cohort, white standardized
d$g_cwz = do_cwz(variable = "g", whites = T, data = d)

#simple and mirt scores
GG_scatter(d, "g", "WORDSUM")
## `geom_smooth()` using formula 'y ~ x'

GG_scatter(d, "g_cwz", "wordsum_cwz")
## `geom_smooth()` using formula 'y ~ x'

#sum stats
describeBy(
  d$g,
  d$race_3way
)
## 
##  Descriptive statistics by group 
## group: WHITE
##    vars     n mean sd median trimmed  mad  min max range  skew kurtosis   se
## X1    1 25489    0  1      0    0.03 0.97 -3.4 1.7   5.1 -0.35     0.06 0.01
## ------------------------------------------------------------ 
## group: BLACK
##    vars    n  mean sd median trimmed  mad  min max range  skew kurtosis   se
## X1    1 4690 -0.63  1  -0.47   -0.59 0.99 -3.4 1.7   5.1 -0.33    -0.21 0.01
## ------------------------------------------------------------ 
## group: OTHER
##    vars    n  mean  sd median trimmed mad  min max range  skew kurtosis   se
## X1    1 1791 -0.68 1.1   -0.6   -0.66 1.2 -3.4 1.7   5.1 -0.21    -0.51 0.03
miss_count(d$g, reverse = T)
## [1] 31970
#fertility
d %<>% mutate(
  fertility = CHILDS %>% as.numeric() %>% mapvalues(from = 9, to = NA, warn_missing = F),
)

#standardized within cohorts, and for sex (since we need the age interaction)
d$fertility_csz = plyr::ddply(d, "YEAR", function(dd) {
  #remove age within this survey year
  fit = ols(fertility ~ rcs(AGE) * SEX, data = dd)
  
  tibble(
    fert = resid(fit) %>% standardize()
  )
}) %>% pull(fert)

#what does it look like?
d$fertility_csz %>% describe2()
d$fertility_csz %>% GG_denhist()
## Warning: Removed 412 rows containing non-finite values (stat_bin).
## Warning: Removed 412 rows containing non-finite values (stat_density).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 412 rows containing non-finite values (stat_bin).
## Removed 412 rows containing non-finite values (stat_density).

#religion
d$OTHER %>% as_factor() %>% table2()
d$RELIG %>% as_factor() %>% table2()
d$attend_service = d$ATTEND %>% as_factor() %>% fct_drop() %>% as.ordered()
d$attend_service %>% table2()
d$attend_service_num = d$attend_service %>% as.numeric() %>% rescale(new_min = 0, new_max = 1)

#recode mormons
d %<>% mutate(
  mormon = case_when(
    (OTHER %in% c(59:62, 64)) ~ T,
    TRUE ~ F
  ),
  religion_group = RELIG %>% as_factor() %>% mapvalues(from = c("DK", "NA"), to = rep(NA, 2)) %>% fct_lump_min(min = 150, other_level = "OTHER") %>% fct_relevel("NONE"),
  any_religion = (religion_group != "NONE")
)

d$mormon %>% table2()
d$religion_group %>% table2()
d$religion_group %>% levels()
## [1] "NONE"         "PROTESTANT"   "CATHOLIC"     "JEWISH"       "BUDDHISM"    
## [6] "MOSLEM/ISLAM" "CHRISTIAN"    "OTHER"
#old people, with completed fertility
d %<>% mutate(
  complete_fert = (AGE >= 45 & SEX == "MALE") | (AGE >= 40 & SEX != "MALE")
)
d$complete_fert %>% table2()
#various subsets
d_c = d %>% filter(complete_fert)
d_cw = d %>% filter(complete_fert, race_3way == "WHITE")
d_cm = d %>% filter(complete_fert, mormon)
d_cwm = d %>% filter(complete_fert, race_3way == "WHITE", mormon)
d_w = d %>% filter(race_3way == "WHITE")
d_wm = d %>% filter(race_3way == "WHITE", mormon)
d_m = d %>% filter(mormon)

Analysis

Step-wise function

Define a function to run the regression for a given variable. This makes coding a lot easier and less error prone.

run_regs = function(x, data, y = "fertility") {
  #make models
  m1 = str_glue("{y} ~ g_cwz + {x} * g_cwz")
  m2 = str_glue("{y} ~ g_cwz + {x} * g_cwz + SEX")
  m3 = str_glue("{y} ~ g_cwz + {x} * g_cwz + SEX * rcs(AGE)")
  m4 = str_glue("{y} ~ g_cwz + {x} * g_cwz + SEX * rcs(AGE) + rcs(YEAR)")
  m5 = str_glue("{y} ~ g_cwz + {x} * g_cwz + SEX * rcs(AGE) * rcs(YEAR)")
  m6 = str_glue("{y} ~ g_cwz + {x} * g_cwz + SEX * rcs(AGE) * rcs(YEAR) + race_3way")
  m7 = str_glue("{y} ~ g_cwz + {x} * g_cwz + SEX * rcs(AGE) * rcs(YEAR) + race_3way + {x} * rcs(AGE)")
  
  list(
    ols(as.formula(m1), data = data),
    ols(as.formula(m2), data = data),
    ols(as.formula(m3), data = data),
    ols(as.formula(m4), data = data),
    ols(as.formula(m5), data = data),
    ols(as.formula(m6), data = data),
    ols(as.formula(m7), data = data)
  )
}

run_regs_summary = function(x, data, y = "fertility") {
  run_regs(x = x, data = data, y = y) %>% 
    summarize_models(asterisks_only = F)
}

#without cohort and age controls
run_regs2 = function(x, data, y = "fertility_csz") {
  #make models
  m1 = str_glue("{y} ~ g_cwz + {x} * g_cwz")
  m2 = str_glue("{y} ~ g_cwz + {x} * g_cwz + race_3way")
  m3 = str_glue("{y} ~ g_cwz + {x} * g_cwz + race_3way + {x} * rcs(AGE)")
  
  list(
    ols(as.formula(m1), data = data),
    ols(as.formula(m2), data = data),
    ols(as.formula(m3), data = data)
  )
}

run_regs2_summary = function(x, data, y = "fertility_csz") {
  run_regs2(x = x, data = data, y = y) %>% 
    summarize_models(asterisks_only = F)
}

Step-wise results

#mormons
run_regs_summary("mormon", d)
run_regs_summary("mormon", d_c)
#any religion
run_regs_summary("any_religion", d)
run_regs_summary("any_religion", d_c)
#major groups
run_regs_summary("religion_group", d)
run_regs_summary("religion_group", d_c)

Cohort standardized values

#mormons
run_regs2_summary("mormon", d)
run_regs2_summary("mormon", d_c)
#any religion
run_regs2_summary("any_religion", d)
run_regs2_summary("any_religion", d_c)
#major groups
run_regs2_summary("religion_group", d)
run_regs2_summary("religion_group", d_c)

Race

run_regs_summary("race_3way", d)
run_regs_summary("race_3way", d_c)

Politics

run_regs_summary("pol_ideo_self", d)
run_regs_summary("pol_ideo_self", d_c)
run_regs_summary("pol_ideo_self_num", d)
run_regs_summary("pol_ideo_self_num", d_c)
#cohort standardized
run_regs2_summary("pol_ideo_self", d)
run_regs2_summary("pol_ideo_self", d_c)
run_regs2_summary("pol_ideo_self_num", d)
run_regs2_summary("pol_ideo_self_num", d_c)

Additional models

Reviewer requests

#R2 requested this
#model with all predictors together, main effects
list(
  #standard coding
  ols(fertility ~ g_cwz + mormon * g_cwz + SEX * rcs(AGE) * rcs(YEAR) + race_3way + attend_service * g_cwz + pol_ideo_self_num * g_cwz, data = d),

  #religious service as numerical (higher power, less sensible)
  ols(fertility ~ g_cwz + mormon * g_cwz + SEX * rcs(AGE) * rcs(YEAR) + race_3way + attend_service_num * g_cwz + pol_ideo_self_num * g_cwz, data = d)
) %>% summarize_models(asterisks_only = F, collapse_nonlinear = T, add_ref_level = F)
#model with all predictors together, interaction effect for polviews x mormon
list(
  #standard coding
  ols(fertility ~ g_cwz + mormon * g_cwz + SEX * rcs(AGE) * rcs(YEAR) + race_3way + attend_service * g_cwz + pol_ideo_self_num * g_cwz + pol_ideo_self_num * mormon, data = d),

  #religious service as numerical (higher power, less sensible)
  ols(fertility ~ g_cwz + mormon * g_cwz + SEX * rcs(AGE) * rcs(YEAR) + race_3way + attend_service_num * g_cwz + pol_ideo_self_num * g_cwz + pol_ideo_self_num * mormon, data = d)
) %>% summarize_models(asterisks_only = F, collapse_nonlinear = T, add_ref_level = F)
#model with all predictors together, interaction effect for polviews x rel attendance
list(
  #standard coding
  ols(fertility ~ g_cwz + mormon * g_cwz + SEX * rcs(AGE) * rcs(YEAR) + race_3way + attend_service * g_cwz + pol_ideo_self_num * g_cwz + pol_ideo_self_num * attend_service, data = d),

  #religious service as numerical (higher power, less sensible)
  ols(fertility ~ g_cwz + mormon * g_cwz + SEX * rcs(AGE) * rcs(YEAR) + race_3way + attend_service_num * g_cwz + pol_ideo_self_num * g_cwz + pol_ideo_self_num * attend_service_num, data = d)
) %>% summarize_models(asterisks_only = F, collapse_nonlinear = T, add_ref_level = F)

Meta

write_sessioninfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] mirt_1.36.1           rms_6.3-0             SparseM_1.81         
##  [4] haven_2.5.0           kirkegaard_2022-05-06 psych_2.2.3          
##  [7] assertthat_0.2.1      weights_1.0.4         Hmisc_4.7-0          
## [10] Formula_1.2-4         survival_3.2-13       lattice_0.20-45      
## [13] magrittr_2.0.3        forcats_0.5.1         stringr_1.4.0        
## [16] dplyr_1.0.9           purrr_0.3.4           readr_2.1.2          
## [19] tidyr_1.2.0           tibble_3.1.7          ggplot2_3.3.6        
## [22] tidyverse_1.3.1      
## 
## loaded via a namespace (and not attached):
##   [1] TH.data_1.1-1        minqa_1.2.4          colorspace_2.0-3    
##   [4] ellipsis_0.3.2       htmlTable_2.4.0      base64enc_0.1-3     
##   [7] fs_1.5.2             rstudioapi_0.13      mice_3.14.0         
##  [10] farver_2.1.0         Deriv_4.1.3          MatrixModels_0.5-0  
##  [13] fansi_1.0.3          mvtnorm_1.1-3        lubridate_1.8.0     
##  [16] xml2_1.3.3           codetools_0.2-18     splines_4.2.0       
##  [19] mnormt_2.0.2         knitr_1.39           jsonlite_1.8.0      
##  [22] nloptr_2.0.1         broom_0.8.0          cluster_2.1.3       
##  [25] dbplyr_2.1.1         png_0.1-7            compiler_4.2.0      
##  [28] httr_1.4.3           backports_1.4.1      Matrix_1.4-1        
##  [31] fastmap_1.1.0        cli_3.3.0            htmltools_0.5.2     
##  [34] quantreg_5.93        tools_4.2.0          gtable_0.3.0        
##  [37] glue_1.6.2           Rcpp_1.0.8.3         cellranger_1.1.0    
##  [40] jquerylib_0.1.4      vctrs_0.4.1          gdata_2.18.0        
##  [43] nlme_3.1-157         xfun_0.30            lme4_1.1-29         
##  [46] rvest_1.0.2          lifecycle_1.0.1      dcurver_0.9.2       
##  [49] gtools_3.9.2         polspline_1.1.20     MASS_7.3-57         
##  [52] zoo_1.8-10           scales_1.2.0         hms_1.1.1           
##  [55] parallel_4.2.0       sandwich_3.0-1       RColorBrewer_1.1-3  
##  [58] yaml_2.3.5           pbapply_1.5-0        gridExtra_2.3       
##  [61] sass_0.4.1           rpart_4.1.16         latticeExtra_0.6-29 
##  [64] stringi_1.7.6        highr_0.9            permute_0.9-7       
##  [67] checkmate_2.1.0      boot_1.3-28          rlang_1.0.2         
##  [70] pkgconfig_2.0.3      evaluate_0.15        labeling_0.4.2      
##  [73] htmlwidgets_1.5.4    tidyselect_1.1.2     plyr_1.8.7          
##  [76] R6_2.5.1             generics_0.1.2       multcomp_1.4-19     
##  [79] DBI_1.1.2            mgcv_1.8-40          pillar_1.7.0        
##  [82] foreign_0.8-82       withr_2.5.0          nnet_7.3-17         
##  [85] modelr_0.1.8         crayon_1.5.1         utf8_1.2.2          
##  [88] tmvnsim_1.0-2        tzdb_0.3.0           rmarkdown_2.14      
##  [91] jpeg_0.1-9           grid_4.2.0           readxl_1.4.0        
##  [94] data.table_1.14.2    vegan_2.6-2          reprex_2.0.1        
##  [97] digest_0.6.29        GPArotation_2022.4-1 munsell_0.5.0       
## [100] bslib_0.3.1
#upload to OSF
if (F) {
  library(osfr)
  
  #auth
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/czs48/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(osf_proj, conflicts = "overwrite", path = c(
    "data",
    "notebook.Rmd", "notebook.html", "sessions_info.txt"
  ))
}