Init

#global options
options(
  digits = 2,
  contrasts = c("contr.treatment", "contr.treatment")
)

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
    rms,
    mirt,
    googlesheets4,
    patchwork
)

## Loading required package: stats4
## Loading required package: lattice

theme_set(theme_bw())

options(
    digits = 3,
    scipen = 999
)

Functions

#code likert to numbers
recode_likert_taboo = function(x) {
  mapvalues(x, from = c("Not at all taboo", "A little taboo", "Somewhat taboo", "Very taboo", "Extremely taboo"), to = 1:5) %>% as.numeric()
}

#recode nonconsent to NA
recode_nonconsent = function(x) {
  mapvalues(x, from = "CONSENT_REVOKED", to = NA, warn_missing = F)
}

#likert
recode_likert = function(x) {
  mapvalues(x, from = c("Totally disagree", "Somewhat disagree", "Neutral", "Somewhat agree", "Totally agree"), to = c(1:5)) %>% as.numeric()
}

Data

#for privacy reasons, we only do this once, and then reuse the merged file
if (F) {
  #prolific
  prolific_meta = read_csv("data_sensitive/prolific_export_62bfea38712944a2d934e957.csv") %>% 
    df_legalize_names()
  
  prolific_data = read_csv("data_sensitive/20220830035106-SurveyExport.csv") %>% 
    df_legalize_names()
  
  #join to overlap (we get rid of test data this way too)
  d = left_join(
    prolific_meta,
    prolific_data %>% filter(Status == "Complete"),
    by = c("Participant_id" = "Please_enter_your_Prolific_ID")
  )
  
  #no dups
  assert_that(!anyDuplicated(d$Participant_id))
  
  #remove personal identifiers
  d %>% 
    select(
      -Submission_id,
      -Participant_id,
      -Referer,
      -SessionID,
      -IP_Address,
      -User_Agent
    ) %>%
    write_rds("data/prolific_merged.rds")  
}

d = read_rds("data/prolific_merged.rds")

#fields metadata
d %<>% mutate(
  science_one_way_knowing = Science_is_just_one_way_of_knowing_no_more_valid_or_accurate_than_other_approaches_to_knowledge %>% recode_nonconsent() %>% recode_likert()
)

Attention checks

attention_check_vars = c("Set_the_slider_to_not_scientific",
                         "Set_the_slider_in_the_middle")

d %<>% mutate(
  attention_check_1 = !Set_the_slider_to_not_scientific <= 10,
  attention_check_2 = !is_between(Set_the_slider_in_the_middle, a = 40, b = 60),
  attention_check_fails = attention_check_1 + attention_check_2
)


#remove fails
d_all = d
d = filter(d, attention_check_fails == 0)

#distribution
d_all$attention_check_fails %>% table2()

d_all %>% select(contains("attention_check")) %>% describe2()

#ids of fails
# d_all %>% filter(attention_check_fails > 0) %>% select(Participant_id, Set_the_slider_to_not_scientific, Set_the_slider_in_the_middle, attention_check_1, attention_check_2, attention_check_fails) %>% View("Rejected IDs")

Demographics

d$race = d$Ethnicity_simplified %>% fct_relevel("White") %>% recode_nonconsent()
d$race %>% table2()

d$white = d$race == "White"

d$Age %<>% as.numeric()
d$Age %>% describe2()

d$Sex %<>% recode_nonconsent()
d$Sex %>% table2()

d$vote_US_2020 = d$Who_did_you_vote_for_in_the_last_presidential_election_2020 %>% recode_nonconsent() %>% fct_relevel("Democrats (Biden)")
d$vote_US_2020 %>% table2()

#variable table
d_table = df_var_table(d)

Analysis

Score scientific knowledge test

science_items = d %>% select(Which_of_the_following_is_an_example_of_genetic_engineering:What_does_DNA_stand_for)
science_items_keys = c(
  "Inserting a gene into plants that makes them resistant to insects.",
  "Herbert Spencer",
  "Nitrogen",
  "Aspirin",
  "Astrology",
  "Jonas Salk",
  "Amplitude or height",
  "About 50%.",
  "The Gaussian distribution.",
  "The tilt of the Earth’s axis in relation to the sun",
  "66 million years",
  "Deoxyribonucleic acid"
)

#score to binary
science_items_scored = score_items(science_items, science_items_keys)

#IRT
science_irt = mirt(
  science_items_scored,
  model = 1
)

## Iteration: 1, Log-Lik: -3019.988, Max-Change: 0.35353Iteration: 2, Log-Lik: -2992.628, Max-Change: 0.22653Iteration: 3, Log-Lik: -2985.826, Max-Change: 0.15889Iteration: 4, Log-Lik: -2983.942, Max-Change: 0.08594Iteration: 5, Log-Lik: -2983.454, Max-Change: 0.05019Iteration: 6, Log-Lik: -2983.300, Max-Change: 0.03089Iteration: 7, Log-Lik: -2983.236, Max-Change: 0.01418Iteration: 8, Log-Lik: -2983.226, Max-Change: 0.00889Iteration: 9, Log-Lik: -2983.222, Max-Change: 0.00558Iteration: 10, Log-Lik: -2983.220, Max-Change: 0.00135Iteration: 11, Log-Lik: -2983.220, Max-Change: 0.00224Iteration: 12, Log-Lik: -2983.220, Max-Change: 0.00019Iteration: 13, Log-Lik: -2983.220, Max-Change: 0.00018Iteration: 14, Log-Lik: -2983.220, Max-Change: 0.00016Iteration: 15, Log-Lik: -2983.220, Max-Change: 0.00015Iteration: 16, Log-Lik: -2983.220, Max-Change: 0.00032Iteration: 17, Log-Lik: -2983.220, Max-Change: 0.00004

science_irt

## 
## Call:
## mirt(data = science_items_scored, model = 1)
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 0.0001 tolerance after 17 EM iterations.
## mirt version: 1.41.8 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -2983
## Estimated parameters: 24 
## AIC = 6014
## BIC = 6116; SABIC = 6040
## G2 (4071) = 862, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN

science_irt %>% summary()

##                                                                                                                                              F1
## Which_of_the_following_is_an_example_of_genetic_engineering                                                                               0.508
## Who_originated_the_phrase_survival_of_the_fittest                                                                                         0.340
## Which_gas_makes_up_most_of_the_Earth_s_atmosphere                                                                                         0.728
## Which_over_the_counter_per_U_S_regulations_drug_do_doctors_recommend_that_people_take_to_help_prevent_heart_attacks                       0.231
## Which_of_these_terms_is_defined_as_the_study_of_how_the_positions_of_stars_and_planets_can_influence_human_behavior                       0.312
## Which_of_these_people_developed_the_polio_vaccine                                                                                         0.601
## The_loudness_of_a_sound_is_determined_by_what_property_of_a_sound_wave_Is_it                                                              0.664
## Imagine_a_school_class_with_23_children_What_is_the_probability_that_two_or_more_of_them_have_their_birthdays_on_the_same_day_of_the_year 0.589
## The_normal_distribution_also_has_another_name_Which_one_is_the_right_name                                                                 0.604
## What_is_the_main_cause_of_seasons_on_the_Earth                                                                                            0.523
## Scientists_believe_that_most_dinosaurs_were_killed_following_a_large_astroid_impact_How_long_ago_was_this_approximately                   0.600
## What_does_DNA_stand_for                                                                                                                   0.563
##                                                                                                                                               h2
## Which_of_the_following_is_an_example_of_genetic_engineering                                                                               0.2577
## Who_originated_the_phrase_survival_of_the_fittest                                                                                         0.1155
## Which_gas_makes_up_most_of_the_Earth_s_atmosphere                                                                                         0.5295
## Which_over_the_counter_per_U_S_regulations_drug_do_doctors_recommend_that_people_take_to_help_prevent_heart_attacks                       0.0534
## Which_of_these_terms_is_defined_as_the_study_of_how_the_positions_of_stars_and_planets_can_influence_human_behavior                       0.0974
## Which_of_these_people_developed_the_polio_vaccine                                                                                         0.3614
## The_loudness_of_a_sound_is_determined_by_what_property_of_a_sound_wave_Is_it                                                              0.4403
## Imagine_a_school_class_with_23_children_What_is_the_probability_that_two_or_more_of_them_have_their_birthdays_on_the_same_day_of_the_year 0.3474
## The_normal_distribution_also_has_another_name_Which_one_is_the_right_name                                                                 0.3653
## What_is_the_main_cause_of_seasons_on_the_Earth                                                                                            0.2738
## Scientists_believe_that_most_dinosaurs_were_killed_following_a_large_astroid_impact_How_long_ago_was_this_approximately                   0.3599
## What_does_DNA_stand_for                                                                                                                   0.3171
## 
## SS loadings:  3.52 
## Proportion Var:  0.293 
## 
## Factor correlations: 
## 
##    F1
## F1  1

#scores
science_irt_scores = fscores(science_irt, full.scores = T, full.scores.SE = T)

#reliability
empirical_rxx(science_irt_scores)

##    F1 
## 0.705

#save for use in models
d$science_knowledge = science_irt_scores[, 1] %>% standardize(focal_group = d$race == "White")

#simple score
d$science_knowledge_sum = science_items_scored %>% rowSums()

#distribution
d$science_knowledge %>% 
  GG_denhist()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

d %>% 
  ggplot(aes(science_knowledge_sum)) +
  geom_bar() +
  scale_x_continuous(breaks = 0:12)

#IQ scale
sciknow_IQ = make_norms(
  d$science_knowledge,
  d$Age,
  norm_group = d$race == "White"
)

## No detected linear effect of age on the score (p = 0.756). Model not used.
## No detected variance effect of age on the score (p = 0.833). Model not used.

d$science_knowledge_IQ = sciknow_IQ$data$IQ

d %>% select(science_knowledge, science_knowledge_sum, science_knowledge_IQ) %>% describe2()

#group differences
describe2(d %>% select(science_knowledge, science_knowledge_sum, science_knowledge_IQ), d$white)

GG_denhist(d, "science_knowledge_IQ", "white")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Controversial questions

#question ratings
questions_ratings = d %>% select(Whether_there_are_genetic_causes_of_homosexuality_Of_the_following_questions_please_indicate_how_taboo_you_think_the_question_is:Whether_immigration_will_make_the_country_worse_Of_the_following_questions_please_indicate_how_taboo_you_think_the_question_is)

#recode to numeric
questions_ratings_num = map_df(questions_ratings, recode_likert_taboo)

#questions data
questions = tibble(
  question = questions_ratings %>% names() %>% str_replace("_Of_the_following_questions_please_indicate_how_taboo_you_think_the_question_is", "") %>% str_clean(),
  taboo = questions_ratings_num %>% colMeans()
)

#scores by moderators
#political placement left-right
tmp = score_by(questions_ratings_num, d$Relative_to_the_Democratic_party_and_the_Republican_party_where_would_you_put_yourself)
questions$taboo_repub_placement = tmp[2, -1] %>% as.numeric()
questions$taboo_dem_placement = tmp[1, -1] %>% as.numeric()

#voting
tmp = score_by(questions_ratings_num, d$Who_did_you_vote_for_in_the_last_presidential_election_2020)
questions$taboo_repub_vote = tmp[5, -1] %>% as.numeric()
questions$taboo_dem_vote = tmp[1, -1] %>% as.numeric()

#science knowledge
tmp = score_by(questions_ratings_num, d$science_knowledge, extrapolate_to = c(-1, 1))
questions$taboo_low_sci_know = tmp[1, -1] %>% as.numeric()
questions$taboo_high_sci_know = tmp[2, -1] %>% as.numeric()

#sex
tmp = score_by(questions_ratings_num, d$Sex)
questions$taboo_women = tmp[1, -1] %>% as.numeric()
questions$taboo_men = tmp[2, -1] %>% as.numeric()

#age
tmp = score_by(questions_ratings_num, d$Age, extrapolate_to = c(20, 60))
questions$taboo_young = tmp[1, -1] %>% as.numeric()
questions$taboo_old = tmp[2, -1] %>% as.numeric()

#race
tmp = score_by(questions_ratings_num, d$Ethnicity_simplified)
questions$taboo_whites = tmp[5, -1] %>% as.numeric()
questions$taboo_blacks = tmp[2, -1] %>% as.numeric()
questions$taboo_asians = tmp[1, -1] %>% as.numeric()
questions$taboo_mixed = tmp[3, -1] %>% as.numeric()


#plot tabooness
questions %>% 
  mutate(question = fct_reorder(question, taboo)) %>% 
  ggplot(aes(taboo, question)) +
  geom_point() +
  scale_x_continuous(limits = c(1, 5))

GG_save("figs/taboo all.png")

#ordinal plot
questions_ratings %>% 
  mutate(
    id = 1:n()
  ) %>% 
  pivot_longer(cols = -id) %>% 
  mutate(
    value = fct_relevel(value, "Not at all taboo", "A little taboo", "Somewhat taboo", "Very taboo", "Extremely taboo") %>% as.ordered(),
    value_num = value %>% recode_likert_taboo(),
    question = name %>% str_replace("_Of_the_following_questions_please_indicate_how_taboo_you_think_the_question_is", "") %>% str_clean() %>% str_trim() %>% fct_reorder(.x = value_num, .fun = mean)
  ) %>% 
  ggplot(aes(question, fill = value)) +
  geom_bar(position = "stack") +
  coord_flip()

GG_save("figs/taboo all ord.png")

#by vote
questions %>% select(taboo_dem_vote, taboo_repub_vote, question) %>% 
  pivot_longer(cols = -question) %>% 
  mutate(
    question = fct_reorder(question, value),
    name = fct_recode(name, "Democrat" = "taboo_dem_vote", "Republican" = "taboo_repub_vote")
    ) %>% 
  ggplot(aes(value, question, color = name)) +
  geom_point() +
  scale_x_continuous(limits = c(1, 5)) +
  scale_color_discrete("Vote")

GG_save("figs/taboo vote.png")

#by science knowledge
questions %>% select(taboo_low_sci_know, taboo_high_sci_know, question) %>% 
  pivot_longer(cols = -question) %>% 
  mutate(
    question = fct_reorder(question, value),
    name = fct_recode(name, "Low" = "taboo_low_sci_know", "High" = "taboo_high_sci_know")
    ) %>% 
  ggplot(aes(value, question, color = name)) +
  geom_point() +
  scale_x_continuous(limits = c(1, 5)) +
  scale_color_discrete("Science knowledge")

GG_save("figs/taboo sci.png")

#by sex
questions %>% select(taboo_women, taboo_men, question) %>% 
  pivot_longer(cols = -question) %>% 
  mutate(
    question = fct_reorder(question, value),
    name = fct_recode(name, "Men" = "taboo_men", "Women" = "taboo_women")
    ) %>% 
  ggplot(aes(value, question, color = name)) +
  geom_point() +
  scale_x_continuous(limits = c(1, 5)) +
  scale_color_discrete("Sex")

GG_save("figs/taboo sex.png")

#by race
questions %>% select(taboo_whites:taboo_mixed, question) %>% 
  pivot_longer(cols = -question) %>% 
  mutate(
    question = fct_reorder(question, value),
    name = fct_recode(name, "White" = "taboo_whites", "Black" = "taboo_blacks", "Asian" = "taboo_asians", "Mixed" = "taboo_mixed")
    ) %>% 
  ggplot(aes(value, question, color = name)) +
  geom_point() +
  scale_x_continuous(limits = c(1, 5)) +
  scale_color_discrete("Race")

GG_save("figs/taboo race.png")

#by age
questions %>% select(taboo_young:taboo_old, question) %>% 
  pivot_longer(cols = -question) %>% 
  mutate(
    question = fct_reorder(question, value),
    name = name %>% fct_recode("Young" = "taboo_young", "Old" = "taboo_old")
    ) %>% 
  ggplot(aes(value, question, color = name)) +
  geom_point() +
  scale_x_continuous(limits = c(1, 5)) +
  scale_color_discrete("Age")

GG_save("figs/taboo age.png")

#all taboo correlations
questions %>% 
  select(starts_with("taboo_")) %>% 
  wtd.cors() %>% 
  GG_heatmap(font_size = 3)

Meta

#versions
write_sessioninfo()

## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_DK.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_DK.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_DK.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] patchwork_1.2.0       googlesheets4_1.1.1   mirt_1.41.8          
##  [4] lattice_0.22-5        rms_6.8-0             kirkegaard_2024-04-01
##  [7] psych_2.4.3           assertthat_0.2.1      weights_1.0.4        
## [10] Hmisc_5.1-2           magrittr_2.0.3        lubridate_1.9.3      
## [13] forcats_1.0.0         stringr_1.5.1         dplyr_1.1.4          
## [16] purrr_1.0.2           readr_2.1.5           tidyr_1.3.1          
## [19] tibble_3.2.1          ggplot2_3.5.0         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] mnormt_2.1.1         pbapply_1.7-2        gridExtra_2.3       
##   [4] permute_0.9-7        sandwich_3.1-0       rlang_1.1.3         
##   [7] multcomp_1.4-25      polspline_1.1.24     compiler_4.3.3      
##  [10] mgcv_1.9-1           gdata_3.0.0          systemfonts_1.0.6   
##  [13] vctrs_0.6.5          quantreg_5.97        pkgconfig_2.0.3     
##  [16] shape_1.4.6.1        fastmap_1.1.1        backports_1.4.1     
##  [19] labeling_0.4.3       utf8_1.2.4           rmarkdown_2.26      
##  [22] tzdb_0.4.0           nloptr_2.0.3         ragg_1.3.0          
##  [25] MatrixModels_0.5-3   xfun_0.43            glmnet_4.1-8        
##  [28] jomo_2.7-6           cachem_1.0.8         jsonlite_1.8.8      
##  [31] highr_0.10           pan_1.9              Deriv_4.1.3         
##  [34] broom_1.0.5          parallel_4.3.3       cluster_2.1.6       
##  [37] R6_2.5.1             bslib_0.7.0          stringi_1.8.3       
##  [40] boot_1.3-30          rpart_4.1.23         cellranger_1.1.0    
##  [43] jquerylib_0.1.4      Rcpp_1.0.12          iterators_1.0.14    
##  [46] knitr_1.45           zoo_1.8-12           base64enc_0.1-3     
##  [49] Matrix_1.6-5         splines_4.3.3        nnet_7.3-19         
##  [52] timechange_0.3.0     tidyselect_1.2.1     rstudioapi_0.16.0   
##  [55] yaml_2.3.8           vegan_2.6-4          codetools_0.2-19    
##  [58] dcurver_0.9.2        plyr_1.8.9           withr_3.0.0         
##  [61] evaluate_0.23        foreign_0.8-86       survival_3.5-8      
##  [64] pillar_1.9.0         mice_3.16.0          checkmate_2.3.1     
##  [67] foreach_1.5.2        generics_0.1.3       hms_1.1.3           
##  [70] munsell_0.5.1        scales_1.3.0         minqa_1.2.6         
##  [73] gtools_3.9.5         glue_1.7.0           tools_4.3.3         
##  [76] data.table_1.15.4    lme4_1.1-35.2        SparseM_1.81        
##  [79] fs_1.6.3             mvtnorm_1.2-4        grid_4.3.3          
##  [82] colorspace_2.1-0     nlme_3.1-163         googledrive_2.1.1   
##  [85] htmlTable_2.4.2      Formula_1.2-5        cli_3.6.2           
##  [88] textshaping_0.3.7    fansi_1.0.6          viridisLite_0.4.2   
##  [91] gargle_1.5.2         gtable_0.3.4         sass_0.4.9          
##  [94] digest_0.6.35        GPArotation_2024.3-1 TH.data_1.1-2       
##  [97] farver_2.1.1         htmlwidgets_1.6.4    htmltools_0.5.8.1   
## [100] lifecycle_1.0.4      mitml_0.4-5          MASS_7.3-60

#save final data
d %>% 
  write_rds("data/data out.rds")

#upload to OSF
#avoid uploading the data in case they freak out again
if (F) {
  library(osfr)
  
  #auth
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/ys9w8/")
  
  #upload files
  #overwrite existing (versioning)
  osf_upload(osf_proj, conflicts = "overwrite", 
             path = c(
               "figs",
               "data",
               "taboo.html",
               "taboo.Rmd",
             ))
}

Taboos in science

17 April, 2024

Init

Functions

Data

Attention checks

Demographics

Analysis

Score scientific knowledge test

Controversial questions

Meta