Init

options(digits = 2)


#remotes::install_github("tidyverse/haven")
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  haven,
  rms,
  mirt,
  ggeffects
  )
## Loading required package: stats4
## Loading required package: lattice
theme_set(theme_bw())

Data

#https://gssdataexplorer.norc.org/pages/show?page=gss%2Fgss_data
#2018, release 3
#d = read_sav("data/GSS_spss/GSS7218_R3.sav")
#2023
d = read_stata("data/GSS_stata/gss7221_r3a.dta")
d_vars = df_var_table(d)

Recode

#social gap due to low genetic intelligence
d$racdif2 %>% as_factor() %>% table2(include_NA = F)
d$race_gap = (case_when(
  d$racdif2 == 1 ~ T,
  d$racdif2 == 2 ~ F,
  T ~ NA
))

#race
d$race %>% as_factor() %>% table2(include_NA = F)
d$race_3way = d$race %>% as_factor()

#newer one
#we have to fix though
d$racecen1 %>% as_factor() %>% table2(include_NA = F)
d$race_multi = d$racecen1 %>% as_factor() %>% fct_lump_min(min = 300, other_level = "other") %>% as.character()
d$race_multi %>% table2(include_NA = F)
#recode multirace
d$race_multi[!is.na(d$racecen2) & ((d$racecen2 %>% as_factor() %>% as.character()) != (d$race_multi %>% as_factor() %>% as.character()))] = "mixed"

#clean names
d$race_multi %<>% mapvalues(from = c("iap", "black or african american", "american indian or alaska native"), to = c(NA, "black", "native american")) %>% str_to_title() %>% as.factor() %>% fct_relevel("White")

d$race_multi %>% table2(include_NA = F)
#sex
d %<>% mutate(
  sex = sex %>% as_factor()
)

#year and age
d$year %<>% as.numeric()
d$age %<>% as.numeric()

#BMI
d %<>% mutate(
  height_cm = height * 2.54,
  weight_kg = weight / 2.20462,
  BMI = weight_kg / ((height_cm/100)^2)
)

#politics ideology
d %<>% mutate(
  pol_ideo_self = polviews %>% as_factor(),
  pol_ideo_self_num = pol_ideo_self %>% as.numeric() %>% subtract(1),
  pol_ideo_self_clean = pol_ideo_self %>% mapvalues(from = c("no answer", "skipped on web", "iap", "moderate, middle of the road"), to = c(NA, NA, NA, "moderate")) %>% fct_drop(),
  pol_ideo_self_clean_4way = pol_ideo_self_clean %>% as.character() %>% str_replace("extremely ", "") %>% str_replace("slightly ", "") %>% factor(levels = c("liberal", "moderate", "conservative", "don't know"))
)

#politics party
d %<>% mutate(
  party = partyid %>% as_factor() %>% mapvalues(from = c("other party", "no answer", "don't know"), to = c(NA, NA, NA)) %>% mapvalues(from = c("not very strong democrat", "independent, close to democrat", "independent (neither, no response)", "independent, close to republican", "not very strong republican"), to = c("democrat", "weak democrat", "independent", "weak republican", "republican")) %>% ordered(levels = c("strong democrat", "democrat", "weak democrat", "independent", "weak republican", "republican", "strong republican")),
  
  party_simple = party %>% mapvalues(from = c("strong democrat", "weak democrat", "strong republican", "weak republican"), to = c("democrat", "democrat", "republican", "republican")) %>% factor(levels = c("democrat", "independent", "republican"))
)

d$party %>% table2(sort_descending = NULL)
d$party_simple %>% table2(sort_descending = NULL)
#education degrees and IQ
d %<>% mutate(
  degree = degree %>% as_factor(),
  wordsum = wordsum %>% as.numeric(),
  wordsum_z = standardize(wordsum),
  wordsum_zw = standardize(wordsum, focal_group = (race_3way == "WHITE"))
)

d$other %>% table2()
#fertility
d %<>% mutate(
  fertility = childs %>% as.numeric() %>% mapvalues(from = 9, to = NA, warn_missing = F)
)

#religion
d %<>% mutate(
  mormon = other %in% c(59:62, 64),
  religion_group = relig %>% as_factor() %>% mapvalues(from = c("dk, na, iap"), to = rep(NA, 1)) %>% fct_lump_min(min = 150, other_level = "other") %>% fct_relevel("none"),
  any_religion = (religion_group != "none"),
  jewish = (religion_group == "Jewish")
)

d$religion_group %>% table2()
d$religion_group %>% levels()
##  [1] "none"                    "protestant"             
##  [3] "catholic"                "jewish"                 
##  [5] "buddhism"                "muslim/islam"           
##  [7] "orthodox-christian"      "christian"              
##  [9] "inter-nondenominational" "no answer"              
## [11] "other"
#sexual orientation
d %<>% mutate(
  sexual_orientation = sexornt %>% as_factor(),
  sexual_orientation_combo = sex + ", " + sexual_orientation
)

Analyses

Score WORDSUM

#wordsum items
wordsum_items = d %>% select(worda:wordj)

#IRT fit
wordsum_irt = mirt(
  wordsum_items %>% map_df(as.numeric)
)
## Warning: data contains response patterns with only NAs
## 
Iteration: 1, Log-Lik: -134887.665, Max-Change: 0.87002
Iteration: 2, Log-Lik: -131402.228, Max-Change: 0.39313
Iteration: 3, Log-Lik: -130553.170, Max-Change: 0.19675
Iteration: 4, Log-Lik: -130394.995, Max-Change: 0.10344
Iteration: 5, Log-Lik: -130351.923, Max-Change: 0.07175
Iteration: 6, Log-Lik: -130337.927, Max-Change: 0.04834
Iteration: 7, Log-Lik: -130333.998, Max-Change: 0.03477
Iteration: 8, Log-Lik: -130332.246, Max-Change: 0.01703
Iteration: 9, Log-Lik: -130330.988, Max-Change: 0.01027
Iteration: 10, Log-Lik: -130330.445, Max-Change: 0.00628
Iteration: 11, Log-Lik: -130330.213, Max-Change: 0.00309
Iteration: 12, Log-Lik: -130330.103, Max-Change: 0.00243
Iteration: 13, Log-Lik: -130330.046, Max-Change: 0.00189
Iteration: 14, Log-Lik: -130330.020, Max-Change: 0.00100
Iteration: 15, Log-Lik: -130330.009, Max-Change: 0.00051
Iteration: 16, Log-Lik: -130330.005, Max-Change: 0.00043
Iteration: 17, Log-Lik: -130330.003, Max-Change: 0.00028
Iteration: 18, Log-Lik: -130330.003, Max-Change: 0.00024
Iteration: 19, Log-Lik: -130330.002, Max-Change: 0.00019
Iteration: 20, Log-Lik: -130330.002, Max-Change: 0.00018
Iteration: 21, Log-Lik: -130330.001, Max-Change: 0.00015
Iteration: 22, Log-Lik: -130330.001, Max-Change: 0.00014
Iteration: 23, Log-Lik: -130330.001, Max-Change: 0.00014
Iteration: 24, Log-Lik: -130330.001, Max-Change: 0.00013
Iteration: 25, Log-Lik: -130330.000, Max-Change: 0.00014
Iteration: 26, Log-Lik: -130330.000, Max-Change: 0.00013
Iteration: 27, Log-Lik: -130330.000, Max-Change: 0.00013
Iteration: 28, Log-Lik: -130330.000, Max-Change: 0.00012
Iteration: 29, Log-Lik: -130330.000, Max-Change: 0.00012
Iteration: 30, Log-Lik: -130330.000, Max-Change: 0.00011
Iteration: 31, Log-Lik: -130329.999, Max-Change: 0.00011
Iteration: 32, Log-Lik: -130329.999, Max-Change: 0.00010
Iteration: 33, Log-Lik: -130329.999, Max-Change: 0.00011
Iteration: 34, Log-Lik: -130329.999, Max-Change: 0.00010
#scores
wordsum_irt_scores = fscores(wordsum_irt)

#save the white standard scores
d$wordsum_g = wordsum_irt_scores[, 1] %>% standardize(focal_group = d$race_multi == "White")
## Warning in standardize(., focal_group = d$race_multi == "White"): `focal_group`
## contains `NA` values. These were converted to `FALSE` following tidyverse
## convention.
d$wordsum_IQ = d$wordsum_g*15 + 100

Ideology, voting and IQ

#plot by politics and year
d %>% 
  filter(!is.na(pol_ideo_self_clean)) %>% 
  ggplot(aes(year, wordsum_IQ, color = pol_ideo_self_clean)) +
  geom_smooth(method = "loess") +
  labs(y = "Wordsum IQ", 
       color = "Self-id political ideology", 
       title = "IQ by self-id political idelogy", 
       subtitle = "All races")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 31258 rows containing non-finite values (`stat_smooth()`).

GG_save("figs/IQ by year and ideology.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 31258 rows containing non-finite values (`stat_smooth()`).
#without year
d %>% 
  filter(!is.na(pol_ideo_self_clean)) %>% 
  GG_group_means("wordsum_IQ", "pol_ideo_self_clean", type = "point") +
  labs(y = "Wordsum IQ", 
       x = "Self-id political ideology",
       title = "IQ by self-id political idelogy", 
       subtitle = "All races")
## Missing values were removed.

GG_save("figs/IQ by ideology.png")

#simplified
d %>% 
  filter(!is.na(pol_ideo_self_clean_4way)) %>% 
  GG_group_means("wordsum_IQ", "pol_ideo_self_clean_4way", type = "point") +
  labs(y = "Wordsum IQ",
       x = "Self-id political ideology, simplified",
       title = "IQ by self-id political idelogy, simplified", 
       subtitle = "All races")
## Missing values were removed.

GG_save("figs/IQ by ideology 4way.png")

#party by year and party
d %>% 
  filter(!is.na(party)) %>% 
  ggplot(aes(year, wordsum_IQ, color = party)) +
  geom_smooth(method = "loess") +
  labs(y = "Wordsum IQ", 
       color = "Self-id party",
       title = "IQ by self-id political party", 
       subtitle = "All races")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 35903 rows containing non-finite values (`stat_smooth()`).

GG_save("figs/IQ by year and party.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 35903 rows containing non-finite values (`stat_smooth()`).
#without time
d %>% 
  filter(!is.na(party)) %>% 
  GG_group_means("wordsum_IQ", "party", type = "point") +
  labs(y = "Wordsum IQ", 
       x = "Self-id party",
       title = "IQ by self-id political party", 
       subtitle = "All races")
## Missing values were removed.

GG_save("figs/IQ by party.png")

#simplified
d %>% 
  filter(!is.na(party_simple)) %>% 
  GG_group_means("wordsum_IQ", "party_simple", type = "point") +
  labs(y = "Wordsum IQ",
       x = "Self-id political ideology, simplified",
       title = "IQ by self-id political party, simplified", 
       subtitle = "All races")
## Missing values were removed.

GG_save("figs/IQ by party simple.png")

#Whites only
d_whites = d %>% filter(
  race_multi == "White"
)

#plot by politics and year
d_whites %>% 
  filter(!is.na(pol_ideo_self_clean)) %>% 
  ggplot(aes(year, wordsum_IQ, color = pol_ideo_self_clean)) +
  geom_smooth(method = "loess") +
  labs(y = "Wordsum IQ", 
       color = "Self-id political ideology", 
       title = "IQ by self-id political idelogy", 
       subtitle = "Whites only")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 11735 rows containing non-finite values (`stat_smooth()`).

GG_save("figs/IQ by year and ideology whites.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 11735 rows containing non-finite values (`stat_smooth()`).
#without year
d_whites %>% 
  filter(!is.na(pol_ideo_self_clean)) %>% 
  GG_group_means("wordsum_IQ", "pol_ideo_self_clean", type = "point") +
  labs(y = "Wordsum IQ", 
       x = "Self-id political ideology",
       title = "IQ by self-id political idelogy", 
       subtitle = "Whites only")
## Missing values were removed.

GG_save("figs/IQ by ideology whites.png")

describe2(d_whites$wordsum_IQ, d_whites$pol_ideo_self_clean)
## New names:
## • `` -> `...1`
#simplified
d_whites %>% 
  filter(!is.na(pol_ideo_self_clean_4way)) %>% 
  GG_group_means("wordsum_IQ", "pol_ideo_self_clean_4way", type = "point") +
  labs(y = "Wordsum IQ",
       x = "Self-id political ideology, simplified",
       title = "IQ by self-id political idelogy, simplified", 
       subtitle = "Whites only")
## Missing values were removed.

GG_save("figs/IQ by ideology 4way whites.png")

describe2(d_whites$wordsum_IQ, d_whites$pol_ideo_self_clean_4way)
## New names:
## • `` -> `...1`
#party by year and party
d_whites %>% 
  filter(!is.na(party)) %>% 
  ggplot(aes(year, wordsum_IQ, color = party)) +
  geom_smooth(method = "loess") +
  labs(y = "Wordsum IQ", 
       color = "Self-id party", 
       title = "IQ by self-id political party", 
       subtitle = "Whites only")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 12491 rows containing non-finite values (`stat_smooth()`).

GG_save("figs/IQ by year and party whites.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 12491 rows containing non-finite values (`stat_smooth()`).
#without time
d_whites %>% 
  filter(!is.na(party)) %>% 
  GG_group_means("wordsum_IQ", "party", type = "point") +
  labs(y = "Wordsum IQ", 
       x = "Self-id party",
       title = "IQ by self-id political party", 
       subtitle = "Whites only")
## Missing values were removed.

GG_save("figs/IQ by party whites.png")

#simplified
d_whites %>% 
  filter(!is.na(party_simple)) %>% 
  GG_group_means("wordsum_IQ", "party_simple", type = "point") +
  labs(y = "Wordsum IQ",
       x = "Self-id political ideology, simplified",
       title = "IQ by self-id political party, simplified", 
       subtitle = "d_whites")
## Missing values were removed.

GG_save("figs/IQ by party simple whites.png")

Meta

#versions
write_sessioninfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggeffects_1.2.1       mirt_1.38.1           lattice_0.20-45      
##  [4] rms_6.6-0             haven_2.5.2           kirkegaard_2023-05-01
##  [7] psych_2.3.3           assertthat_0.2.1      weights_1.0.4        
## [10] Hmisc_5.0-1           magrittr_2.0.3        lubridate_1.9.2      
## [13] forcats_1.0.0         stringr_1.5.0         dplyr_1.1.2          
## [16] purrr_1.0.1           readr_2.1.4           tidyr_1.3.0          
## [19] tibble_3.2.1          ggplot2_3.4.2         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] mnormt_2.1.1         pbapply_1.7-0        gridExtra_2.3       
##  [4] permute_0.9-7        sandwich_3.0-2       rlang_1.1.0         
##  [7] multcomp_1.4-23      polspline_1.1.22     compiler_4.3.0      
## [10] mgcv_1.8-42          gdata_2.18.0.1       systemfonts_1.0.4   
## [13] vctrs_0.6.2          quantreg_5.95        rvest_1.0.3         
## [16] pkgconfig_2.0.3      fastmap_1.1.1        backports_1.4.1     
## [19] labeling_0.4.2       utf8_1.2.3           rmarkdown_2.21      
## [22] tzdb_0.3.0           nloptr_2.0.3         ragg_1.2.5          
## [25] MatrixModels_0.5-1   xfun_0.39            cachem_1.0.7        
## [28] jsonlite_1.8.4       highr_0.10           Deriv_4.1.3         
## [31] broom_1.0.4          parallel_4.3.0       cluster_2.1.4       
## [34] R6_2.5.1             bslib_0.4.2          stringi_1.7.12      
## [37] boot_1.3-28          rpart_4.1.19         jquerylib_0.1.4     
## [40] Rcpp_1.0.10          knitr_1.42           zoo_1.8-12          
## [43] base64enc_0.1-3      Matrix_1.5-1         splines_4.3.0       
## [46] nnet_7.3-18          timechange_0.2.0     tidyselect_1.2.0    
## [49] rstudioapi_0.14      yaml_2.3.7           vegan_2.6-4         
## [52] codetools_0.2-19     dcurver_0.9.2        plyr_1.8.8          
## [55] withr_2.5.0          evaluate_0.20        foreign_0.8-82      
## [58] survival_3.5-3       xml2_1.3.4           pillar_1.9.0        
## [61] mice_3.15.0          checkmate_2.2.0      generics_0.1.3      
## [64] hms_1.1.3            munsell_0.5.0        scales_1.2.1        
## [67] minqa_1.2.5          gtools_3.9.4         glue_1.6.2          
## [70] tools_4.3.0          data.table_1.14.8    lme4_1.1-33         
## [73] SparseM_1.81         webshot_0.5.4        mvtnorm_1.1-3       
## [76] grid_4.3.0           colorspace_2.1-0     nlme_3.1-162        
## [79] htmlTable_2.4.1      Formula_1.2-5        cli_3.6.1           
## [82] textshaping_0.3.6    kableExtra_1.3.4     fansi_1.0.4         
## [85] viridisLite_0.4.1    svglite_2.1.1        gtable_0.3.3        
## [88] sass_0.4.5           digest_0.6.31        GPArotation_2023.3-1
## [91] TH.data_1.1-2        farver_2.1.1         htmlwidgets_1.6.2   
## [94] htmltools_0.5.5      lifecycle_1.0.3      httr_1.4.5          
## [97] MASS_7.3-58.3
#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}