#global options
options(
digits = 2,
contrasts = c("contr.treatment", "contr.treatment")
)
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
rms,
mirt,
googlesheets4,
patchwork
)
## Loading required package: stats4
## Loading required package: lattice
theme_set(theme_bw())
options(
digits = 3,
scipen = 999
)
#code likert to numbers
recode_likert_taboo = function(x) {
mapvalues(x, from = c("Not at all taboo", "A little taboo", "Somewhat taboo", "Very taboo", "Extremely taboo"), to = 1:5) %>% as.numeric()
}
#recode nonconsent to NA
recode_nonconsent = function(x) {
mapvalues(x, from = "CONSENT_REVOKED", to = NA, warn_missing = F)
}
#likert
recode_likert = function(x) {
mapvalues(x, from = c("Totally disagree", "Somewhat disagree", "Neutral", "Somewhat agree", "Totally agree"), to = c(1:5)) %>% as.numeric()
}
#for privacy reasons, we only do this once, and then reuse the merged file
if (F) {
#prolific
prolific_meta = read_csv("data_sensitive/prolific_export_62bfea38712944a2d934e957.csv") %>%
df_legalize_names()
prolific_data = read_csv("data_sensitive/20220830035106-SurveyExport.csv") %>%
df_legalize_names()
#join to overlap (we get rid of test data this way too)
d = left_join(
prolific_meta,
prolific_data %>% filter(Status == "Complete"),
by = c("Participant_id" = "Please_enter_your_Prolific_ID")
)
#no dups
assert_that(!anyDuplicated(d$Participant_id))
#remove personal identifiers
d %>%
select(
-Submission_id,
-Participant_id,
-Referer,
-SessionID,
-IP_Address,
-User_Agent
) %>%
write_rds("data/prolific_merged.rds")
}
d = read_rds("data/prolific_merged.rds")
#fields metadata
d %<>% mutate(
science_one_way_knowing = Science_is_just_one_way_of_knowing_no_more_valid_or_accurate_than_other_approaches_to_knowledge %>% recode_nonconsent() %>% recode_likert()
)
attention_check_vars = c("Set_the_slider_to_not_scientific",
"Set_the_slider_in_the_middle")
d %<>% mutate(
attention_check_1 = !Set_the_slider_to_not_scientific <= 10,
attention_check_2 = !is_between(Set_the_slider_in_the_middle, a = 40, b = 60),
attention_check_fails = attention_check_1 + attention_check_2
)
#remove fails
d_all = d
d = filter(d, attention_check_fails == 0)
#distribution
d_all$attention_check_fails %>% table2()
d_all %>% select(contains("attention_check")) %>% describe2()
#ids of fails
# d_all %>% filter(attention_check_fails > 0) %>% select(Participant_id, Set_the_slider_to_not_scientific, Set_the_slider_in_the_middle, attention_check_1, attention_check_2, attention_check_fails) %>% View("Rejected IDs")
d$race = d$Ethnicity_simplified %>% fct_relevel("White") %>% recode_nonconsent()
d$race %>% table2()
d$white = d$race == "White"
d$Age %<>% as.numeric()
d$Age %>% describe2()
d$Sex %<>% recode_nonconsent()
d$Sex %>% table2()
d$vote_US_2020 = d$Who_did_you_vote_for_in_the_last_presidential_election_2020 %>% recode_nonconsent() %>% fct_relevel("Democrats (Biden)")
d$vote_US_2020 %>% table2()
#variable table
d_table = df_var_table(d)
science_items = d %>% select(Which_of_the_following_is_an_example_of_genetic_engineering:What_does_DNA_stand_for)
science_items_keys = c(
"Inserting a gene into plants that makes them resistant to insects.",
"Herbert Spencer",
"Nitrogen",
"Aspirin",
"Astrology",
"Jonas Salk",
"Amplitude or height",
"About 50%.",
"The Gaussian distribution.",
"The tilt of the Earth’s axis in relation to the sun",
"66 million years",
"Deoxyribonucleic acid"
)
#score to binary
science_items_scored = score_items(science_items, science_items_keys)
#IRT
science_irt = mirt(
science_items_scored,
model = 1
)
## Iteration: 1, Log-Lik: -3019.988, Max-Change: 0.35353Iteration: 2, Log-Lik: -2992.628, Max-Change: 0.22653Iteration: 3, Log-Lik: -2985.826, Max-Change: 0.15889Iteration: 4, Log-Lik: -2983.942, Max-Change: 0.08594Iteration: 5, Log-Lik: -2983.454, Max-Change: 0.05019Iteration: 6, Log-Lik: -2983.300, Max-Change: 0.03089Iteration: 7, Log-Lik: -2983.236, Max-Change: 0.01418Iteration: 8, Log-Lik: -2983.226, Max-Change: 0.00889Iteration: 9, Log-Lik: -2983.222, Max-Change: 0.00558Iteration: 10, Log-Lik: -2983.220, Max-Change: 0.00135Iteration: 11, Log-Lik: -2983.220, Max-Change: 0.00224Iteration: 12, Log-Lik: -2983.220, Max-Change: 0.00019Iteration: 13, Log-Lik: -2983.220, Max-Change: 0.00018Iteration: 14, Log-Lik: -2983.220, Max-Change: 0.00016Iteration: 15, Log-Lik: -2983.220, Max-Change: 0.00015Iteration: 16, Log-Lik: -2983.220, Max-Change: 0.00032Iteration: 17, Log-Lik: -2983.220, Max-Change: 0.00004
science_irt
##
## Call:
## mirt(data = science_items_scored, model = 1)
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 0.0001 tolerance after 17 EM iterations.
## mirt version: 1.41.8
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -2983
## Estimated parameters: 24
## AIC = 6014
## BIC = 6116; SABIC = 6040
## G2 (4071) = 862, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
science_irt %>% summary()
## F1
## Which_of_the_following_is_an_example_of_genetic_engineering 0.508
## Who_originated_the_phrase_survival_of_the_fittest 0.340
## Which_gas_makes_up_most_of_the_Earth_s_atmosphere 0.728
## Which_over_the_counter_per_U_S_regulations_drug_do_doctors_recommend_that_people_take_to_help_prevent_heart_attacks 0.231
## Which_of_these_terms_is_defined_as_the_study_of_how_the_positions_of_stars_and_planets_can_influence_human_behavior 0.312
## Which_of_these_people_developed_the_polio_vaccine 0.601
## The_loudness_of_a_sound_is_determined_by_what_property_of_a_sound_wave_Is_it 0.664
## Imagine_a_school_class_with_23_children_What_is_the_probability_that_two_or_more_of_them_have_their_birthdays_on_the_same_day_of_the_year 0.589
## The_normal_distribution_also_has_another_name_Which_one_is_the_right_name 0.604
## What_is_the_main_cause_of_seasons_on_the_Earth 0.523
## Scientists_believe_that_most_dinosaurs_were_killed_following_a_large_astroid_impact_How_long_ago_was_this_approximately 0.600
## What_does_DNA_stand_for 0.563
## h2
## Which_of_the_following_is_an_example_of_genetic_engineering 0.2577
## Who_originated_the_phrase_survival_of_the_fittest 0.1155
## Which_gas_makes_up_most_of_the_Earth_s_atmosphere 0.5295
## Which_over_the_counter_per_U_S_regulations_drug_do_doctors_recommend_that_people_take_to_help_prevent_heart_attacks 0.0534
## Which_of_these_terms_is_defined_as_the_study_of_how_the_positions_of_stars_and_planets_can_influence_human_behavior 0.0974
## Which_of_these_people_developed_the_polio_vaccine 0.3614
## The_loudness_of_a_sound_is_determined_by_what_property_of_a_sound_wave_Is_it 0.4403
## Imagine_a_school_class_with_23_children_What_is_the_probability_that_two_or_more_of_them_have_their_birthdays_on_the_same_day_of_the_year 0.3474
## The_normal_distribution_also_has_another_name_Which_one_is_the_right_name 0.3653
## What_is_the_main_cause_of_seasons_on_the_Earth 0.2738
## Scientists_believe_that_most_dinosaurs_were_killed_following_a_large_astroid_impact_How_long_ago_was_this_approximately 0.3599
## What_does_DNA_stand_for 0.3171
##
## SS loadings: 3.52
## Proportion Var: 0.293
##
## Factor correlations:
##
## F1
## F1 1
#scores
science_irt_scores = fscores(science_irt, full.scores = T, full.scores.SE = T)
#reliability
empirical_rxx(science_irt_scores)
## F1
## 0.705
#save for use in models
d$science_knowledge = science_irt_scores[, 1] %>% standardize(focal_group = d$race == "White")
#simple score
d$science_knowledge_sum = science_items_scored %>% rowSums()
#distribution
d$science_knowledge %>%
GG_denhist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d %>%
ggplot(aes(science_knowledge_sum)) +
geom_bar() +
scale_x_continuous(breaks = 0:12)
#IQ scale
sciknow_IQ = make_norms(
d$science_knowledge,
d$Age,
norm_group = d$race == "White"
)
## No detected linear effect of age on the score (p = 0.756). Model not used.
## No detected variance effect of age on the score (p = 0.833). Model not used.
d$science_knowledge_IQ = sciknow_IQ$data$IQ
d %>% select(science_knowledge, science_knowledge_sum, science_knowledge_IQ) %>% describe2()
#group differences
describe2(d %>% select(science_knowledge, science_knowledge_sum, science_knowledge_IQ), d$white)
GG_denhist(d, "science_knowledge_IQ", "white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#question ratings
questions_ratings = d %>% select(Whether_there_are_genetic_causes_of_homosexuality_Of_the_following_questions_please_indicate_how_taboo_you_think_the_question_is:Whether_immigration_will_make_the_country_worse_Of_the_following_questions_please_indicate_how_taboo_you_think_the_question_is)
#recode to numeric
questions_ratings_num = map_df(questions_ratings, recode_likert_taboo)
#questions data
questions = tibble(
question = questions_ratings %>% names() %>% str_replace("_Of_the_following_questions_please_indicate_how_taboo_you_think_the_question_is", "") %>% str_clean(),
taboo = questions_ratings_num %>% colMeans()
)
#scores by moderators
#political placement left-right
tmp = score_by(questions_ratings_num, d$Relative_to_the_Democratic_party_and_the_Republican_party_where_would_you_put_yourself)
questions$taboo_repub_placement = tmp[2, -1] %>% as.numeric()
questions$taboo_dem_placement = tmp[1, -1] %>% as.numeric()
#voting
tmp = score_by(questions_ratings_num, d$Who_did_you_vote_for_in_the_last_presidential_election_2020)
questions$taboo_repub_vote = tmp[5, -1] %>% as.numeric()
questions$taboo_dem_vote = tmp[1, -1] %>% as.numeric()
#science knowledge
tmp = score_by(questions_ratings_num, d$science_knowledge, extrapolate_to = c(-1, 1))
questions$taboo_low_sci_know = tmp[1, -1] %>% as.numeric()
questions$taboo_high_sci_know = tmp[2, -1] %>% as.numeric()
#sex
tmp = score_by(questions_ratings_num, d$Sex)
questions$taboo_women = tmp[1, -1] %>% as.numeric()
questions$taboo_men = tmp[2, -1] %>% as.numeric()
#age
tmp = score_by(questions_ratings_num, d$Age, extrapolate_to = c(20, 60))
questions$taboo_young = tmp[1, -1] %>% as.numeric()
questions$taboo_old = tmp[2, -1] %>% as.numeric()
#race
tmp = score_by(questions_ratings_num, d$Ethnicity_simplified)
questions$taboo_whites = tmp[5, -1] %>% as.numeric()
questions$taboo_blacks = tmp[2, -1] %>% as.numeric()
questions$taboo_asians = tmp[1, -1] %>% as.numeric()
questions$taboo_mixed = tmp[3, -1] %>% as.numeric()
#plot tabooness
questions %>%
mutate(question = fct_reorder(question, taboo)) %>%
ggplot(aes(taboo, question)) +
geom_point() +
scale_x_continuous(limits = c(1, 5))
GG_save("figs/taboo all.png")
#ordinal plot
questions_ratings %>%
mutate(
id = 1:n()
) %>%
pivot_longer(cols = -id) %>%
mutate(
value = fct_relevel(value, "Not at all taboo", "A little taboo", "Somewhat taboo", "Very taboo", "Extremely taboo") %>% as.ordered(),
value_num = value %>% recode_likert_taboo(),
question = name %>% str_replace("_Of_the_following_questions_please_indicate_how_taboo_you_think_the_question_is", "") %>% str_clean() %>% str_trim() %>% fct_reorder(.x = value_num, .fun = mean)
) %>%
ggplot(aes(question, fill = value)) +
geom_bar(position = "stack") +
coord_flip()
GG_save("figs/taboo all ord.png")
#by vote
questions %>% select(taboo_dem_vote, taboo_repub_vote, question) %>%
pivot_longer(cols = -question) %>%
mutate(
question = fct_reorder(question, value),
name = fct_recode(name, "Democrat" = "taboo_dem_vote", "Republican" = "taboo_repub_vote")
) %>%
ggplot(aes(value, question, color = name)) +
geom_point() +
scale_x_continuous(limits = c(1, 5)) +
scale_color_discrete("Vote")
GG_save("figs/taboo vote.png")
#by science knowledge
questions %>% select(taboo_low_sci_know, taboo_high_sci_know, question) %>%
pivot_longer(cols = -question) %>%
mutate(
question = fct_reorder(question, value),
name = fct_recode(name, "Low" = "taboo_low_sci_know", "High" = "taboo_high_sci_know")
) %>%
ggplot(aes(value, question, color = name)) +
geom_point() +
scale_x_continuous(limits = c(1, 5)) +
scale_color_discrete("Science knowledge")
GG_save("figs/taboo sci.png")
#by sex
questions %>% select(taboo_women, taboo_men, question) %>%
pivot_longer(cols = -question) %>%
mutate(
question = fct_reorder(question, value),
name = fct_recode(name, "Men" = "taboo_men", "Women" = "taboo_women")
) %>%
ggplot(aes(value, question, color = name)) +
geom_point() +
scale_x_continuous(limits = c(1, 5)) +
scale_color_discrete("Sex")
GG_save("figs/taboo sex.png")
#by race
questions %>% select(taboo_whites:taboo_mixed, question) %>%
pivot_longer(cols = -question) %>%
mutate(
question = fct_reorder(question, value),
name = fct_recode(name, "White" = "taboo_whites", "Black" = "taboo_blacks", "Asian" = "taboo_asians", "Mixed" = "taboo_mixed")
) %>%
ggplot(aes(value, question, color = name)) +
geom_point() +
scale_x_continuous(limits = c(1, 5)) +
scale_color_discrete("Race")
GG_save("figs/taboo race.png")
#by age
questions %>% select(taboo_young:taboo_old, question) %>%
pivot_longer(cols = -question) %>%
mutate(
question = fct_reorder(question, value),
name = name %>% fct_recode("Young" = "taboo_young", "Old" = "taboo_old")
) %>%
ggplot(aes(value, question, color = name)) +
geom_point() +
scale_x_continuous(limits = c(1, 5)) +
scale_color_discrete("Age")
GG_save("figs/taboo age.png")
#all taboo correlations
questions %>%
select(starts_with("taboo_")) %>%
wtd.cors() %>%
GG_heatmap(font_size = 3)
#versions
write_sessioninfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_DK.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_DK.UTF-8 LC_COLLATE=en_DK.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_DK.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] patchwork_1.2.0 googlesheets4_1.1.1 mirt_1.41.8
## [4] lattice_0.22-5 rms_6.8-0 kirkegaard_2024-04-01
## [7] psych_2.4.3 assertthat_0.2.1 weights_1.0.4
## [10] Hmisc_5.1-2 magrittr_2.0.3 lubridate_1.9.3
## [13] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [16] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [19] tibble_3.2.1 ggplot2_3.5.0 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] mnormt_2.1.1 pbapply_1.7-2 gridExtra_2.3
## [4] permute_0.9-7 sandwich_3.1-0 rlang_1.1.3
## [7] multcomp_1.4-25 polspline_1.1.24 compiler_4.3.3
## [10] mgcv_1.9-1 gdata_3.0.0 systemfonts_1.0.6
## [13] vctrs_0.6.5 quantreg_5.97 pkgconfig_2.0.3
## [16] shape_1.4.6.1 fastmap_1.1.1 backports_1.4.1
## [19] labeling_0.4.3 utf8_1.2.4 rmarkdown_2.26
## [22] tzdb_0.4.0 nloptr_2.0.3 ragg_1.3.0
## [25] MatrixModels_0.5-3 xfun_0.43 glmnet_4.1-8
## [28] jomo_2.7-6 cachem_1.0.8 jsonlite_1.8.8
## [31] highr_0.10 pan_1.9 Deriv_4.1.3
## [34] broom_1.0.5 parallel_4.3.3 cluster_2.1.6
## [37] R6_2.5.1 bslib_0.7.0 stringi_1.8.3
## [40] boot_1.3-30 rpart_4.1.23 cellranger_1.1.0
## [43] jquerylib_0.1.4 Rcpp_1.0.12 iterators_1.0.14
## [46] knitr_1.45 zoo_1.8-12 base64enc_0.1-3
## [49] Matrix_1.6-5 splines_4.3.3 nnet_7.3-19
## [52] timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.16.0
## [55] yaml_2.3.8 vegan_2.6-4 codetools_0.2-19
## [58] dcurver_0.9.2 plyr_1.8.9 withr_3.0.0
## [61] evaluate_0.23 foreign_0.8-86 survival_3.5-8
## [64] pillar_1.9.0 mice_3.16.0 checkmate_2.3.1
## [67] foreach_1.5.2 generics_0.1.3 hms_1.1.3
## [70] munsell_0.5.1 scales_1.3.0 minqa_1.2.6
## [73] gtools_3.9.5 glue_1.7.0 tools_4.3.3
## [76] data.table_1.15.4 lme4_1.1-35.2 SparseM_1.81
## [79] fs_1.6.3 mvtnorm_1.2-4 grid_4.3.3
## [82] colorspace_2.1-0 nlme_3.1-163 googledrive_2.1.1
## [85] htmlTable_2.4.2 Formula_1.2-5 cli_3.6.2
## [88] textshaping_0.3.7 fansi_1.0.6 viridisLite_0.4.2
## [91] gargle_1.5.2 gtable_0.3.4 sass_0.4.9
## [94] digest_0.6.35 GPArotation_2024.3-1 TH.data_1.1-2
## [97] farver_2.1.1 htmlwidgets_1.6.4 htmltools_0.5.8.1
## [100] lifecycle_1.0.4 mitml_0.4-5 MASS_7.3-60
#save final data
d %>%
write_rds("data/data out.rds")
#upload to OSF
#avoid uploading the data in case they freak out again
if (F) {
library(osfr)
#auth
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/ys9w8/")
#upload files
#overwrite existing (versioning)
osf_upload(osf_proj, conflicts = "overwrite",
path = c(
"figs",
"data",
"taboo.html",
"taboo.Rmd",
))
}