library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
mirt,
googlesheets4,
rms,
ggeffects,
future, furrr
)
## Loading required package: stats4
## Loading required package: lattice
theme_set(theme_bw())
options(
digits = 3
)
mirtCluster()
plan(multisession(workers = 7))
#delete cache and re-run all
renew_all = F
#sensitive data
if (F) {
d_prolific = read_csv("data/prolific_export_64877cb0ca7d649ce538e74d.csv") %>% df_legalize_names()
d_alchemer = read_csv("data/20230618204902-SurveyExport.csv") %>% df_legalize_names() %>% filter(Status == "Complete")
assert_that(!any(duplicated(d_prolific$Participant_id)))
assert_that(!any(duplicated(d_alchemer$Write_in_your_Prolific_ID)))
d_alchemer$Write_in_your_Prolific_ID %>% table2()
#join on prolific id
d = inner_join(
d_prolific,
d_alchemer,
by = c("Participant_id" = "Write_in_your_Prolific_ID")
)
assert_that(!any(duplicated(d$Participant_id)))
d %>%
select(
-Submission_id,
-IP_Address,
-Referer,
-SessionID,
-User_Agent
) %>%
write_rds("data/raw data.rds", compress = "xz")
}
#load prepared data without sensitive information
d = read_rds("data/raw data.rds")
#table
d_vars = df_var_table(d)
d$native_speaker = d$Is_English_one_of_your_native_languages == "Yes"
table2(d$native_speaker)
#recode covariates
d$age = d$How_old_are_you %>% str_match("\\d+") %>% as.numeric()
d$sex = d$What_is_your_biological_sex %>% mapvalues(from = c("Male (Y chromosome)", "Female (no Y chromosome)"), to = c("Male", "Female")) %>% factor(levels = c("Male", "Female"))
#ethnicity dummies
ethnicity_vars = d %>% select(White_European_Which_of_the_following_racial_ethnic_groups_do_you_identify_with:Other_Which_of_the_following_racial_ethnic_groups_do_you_identify_with) %>% names()
d$white = d$White_European_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$jewish = d$Jewish_Ashkenazi_Sephardic_Misrahi_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$black = d$Black_African_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$hispanic = d$Hispanic_Latino_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$middle_eastern = d$Middle_Eastern_North_African_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$south_asian = d$South_Asian_Indian_subcontinent_excluding_Pakistani_and_Bangladeshi_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$southest_asian = d$Southeast_Asian_Bangladeshi_Vietnamese_Burmese_etc_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$east_asian = d$East_Asian_Chinese_Korean_Japanese_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$polynesian = d$Polynesian_Pacific_Islander_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$native_american = d$Native_American_Amerindian_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$other_race = d$Other_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
ethnicity_simple_vars = d %>% select(white:other_race) %>% names() %>% factor()
d$white_only = as.logical(d$white) & (d %>% select(white:other_race) %>% select(-white) %>% map_df(as.logical) %>% rowSums() %>% equals(0))
d$black_only = as.logical(d$black) & (d %>% select(white:other_race) %>% select(-black) %>% map_df(as.logical) %>% rowSums() %>% equals(0))
#combinations
d$race_combos = d %>% select(white:other_race) %>% encode_combinations()
d$race_combos %>% table2()
#common combos
d$race_combos_common = d$race_combos %>% fct_lump_min(min = 9)
d$race_combos_common %>% table2()
d$sex %>% table2()
easy_items = d %>% select(silly:cheat)
scoring_key = read_excel("data/answer keys for 155 items.xlsx", col_names = c("word", "correct", "notes"))
scoring_key = bind_rows(
tibble(
word = c("silly", "avoid", "remove", "construct"),
correct = c("childish", "evade", "abolish", "create"),
notes = NA
),
scoring_key
) %>% arrange(word)
#sort data colnames alphabetically
colnames_clean = easy_items %>% colnames() %>% str_clean()
order(colnames_clean)
## [1] 79 40 88 64 61 133 145 12 42 47 140 112 124 6 73 95 118 129
## [19] 87 138 98 30 105 45 49 89 8 156 2 83 52 102 35 116 29 155
## [37] 152 104 55 137 159 7 67 17 51 74 59 109 50 68 4 62 108 46
## [55] 41 131 20 100 111 15 122 151 14 103 126 120 69 132 27 34 10 65
## [73] 22 149 158 5 84 11 16 91 130 76 24 32 113 36 43 125 86 54
## [91] 38 57 94 70 31 107 150 26 80 13 44 136 19 148 99 18 28 60
## [109] 77 142 58 85 78 56 9 48 37 115 71 157 117 153 3 75 96 90
## [127] 92 114 154 53 139 141 1 128 72 110 127 135 119 123 23 101 25 66
## [145] 63 106 144 97 121 21 81 147 146 33 134 143 39 93 82
easy_items_sorted = easy_items[, order(colnames_clean)]
#check matches
vocab_matches = tibble(
scoring_key_word = scoring_key$word,
scoring_key_correct = scoring_key$correct,
data = easy_items_sorted %>% colnames()
)
#stubborn is duplicated, but fortunately in the right order
easy_items_scored = score_items(
easy_items_sorted,
scoring_key$correct
)
easy_items_table = map_df(easy_items, table2)
#count of correct
d$vocab_sumscore = rowSums(easy_items_scored)
#IRT
vocab_irt = cache_object(filename = "data/vocab_irt.rds", expr = mirt(
easy_items_scored,
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 5000),
optimizer = "BFGS",
method = "EM",
guess = .20
),
renew = renew_all)
## Cache found, reading object from disk
#results
vocab_irt
##
## Call:
## mirt(data = easy_items_scored, model = 1, itemtype = "2PL", guess = 0.2,
## method = "EM", optimizer = "BFGS", technical = list(NCYCLES = 5000))
##
## Full-information item factor analysis with 1 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 5000 EM iterations.
## mirt version: 1.41.8
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -28464
## Estimated parameters: 318
## AIC = 57565
## BIC = 58904; SABIC = 57895
## G2 (1e+10) = 50753, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
vocab_irt %>% summary()
## F1 h2
## a_specific_number 0.569 0.323
## a_type_of_brass_instrument 0.881 0.776
## a_type_of_drapery 0.764 0.584
## a_type_of_fabric_101 0.780 0.608
## a_type_of_fabric_98 0.786 0.618
## a_type_of_grasshopper 0.873 0.762
## a_type_of_hat 0.827 0.685
## a_type_of_heating_unit 0.638 0.407
## a_type_of_magistrate_position 0.845 0.714
## a_type_of_mathematical_operation 0.631 0.398
## a_type_of_overshoe 0.630 0.397
## a_type_of_plant 0.825 0.681
## a_type_of_religious_teacher 0.847 0.717
## a_type_of_rock 0.501 0.251
## a_type_of_skirts 0.748 0.559
## a_type_of_smokeless_powder 0.595 0.354
## a_type_of_sound 0.622 0.387
## a_type_of_sword 0.510 0.260
## a_type_of_volcanic_crater 0.675 0.456
## a_type_of_wavy_form 0.849 0.721
## actionable_negligence 0.845 0.715
## advisory 0.718 0.516
## advocate 0.733 0.537
## almighty 0.905 0.818
## amazement 0.791 0.626
## amenability 0.844 0.712
## aroma 0.816 0.666
## auspices 0.867 0.753
## avoid 0.807 0.651
## barely_able_to_read_and_write 0.896 0.803
## because 0.778 0.605
## berate 0.885 0.783
## blade 0.764 0.583
## blend 0.716 0.513
## blunder 0.882 0.777
## bow 0.903 0.815
## brief 0.594 0.353
## bring_about 0.864 0.746
## carelessly_or_hastily_put_together 0.743 0.552
## celebration 0.809 0.654
## cheat 0.891 0.793
## cheerful 0.923 0.852
## clay_pigeon_shooting 0.822 0.676
## coarse 0.816 0.665
## collect_or_study_insects 0.756 0.571
## colossal 0.778 0.605
## commotion 0.934 0.873
## complainer 0.898 0.806
## confiscate 0.897 0.805
## congratulate 0.587 0.345
## construct 0.977 0.954
## contemplate 0.628 0.394
## convoy 0.927 0.860
## couch 0.785 0.617
## crease 0.835 0.697
## cunning 0.769 0.591
## deceit 0.651 0.424
## deliberately 0.827 0.684
## deprive 0.679 0.461
## detectable 0.728 0.530
## diatribes 0.777 0.604
## disjoined 0.507 0.257
## disrupt 0.735 0.540
## distinct 0.386 0.149
## divergence 0.898 0.807
## dome 0.677 0.459
## downfall 0.801 0.641
## drink 0.936 0.876
## elite 0.809 0.655
## embarrassment 0.595 0.355
## emphasize 0.830 0.690
## empire 0.640 0.409
## empty 0.781 0.610
## entanglement 0.843 0.710
## environment 0.881 0.776
## evil 0.627 0.393
## excite 0.861 0.741
## farewell 0.948 0.899
## flammable 0.850 0.723
## flatterer 0.914 0.835
## flatteries 0.702 0.493
## forever 0.863 0.744
## frequent 0.602 0.362
## gigantic 0.789 0.622
## girl 0.831 0.691
## goo 0.825 0.680
## goodbye 0.896 0.803
## greed 0.941 0.885
## groan 0.861 0.741
## gruesome 0.750 0.563
## guarantee 0.503 0.253
## gutter 0.792 0.627
## harmfulness 0.593 0.351
## hobby 0.757 0.573
## hut 0.851 0.724
## illness 0.601 0.362
## impromptu 0.827 0.683
## indescribable 0.788 0.620
## intellectual 0.732 0.535
## jargon 0.945 0.892
## knowledgeable 0.885 0.783
## lackadaisical 0.689 0.475
## manager 0.829 0.688
## meal 0.943 0.889
## melodic 0.773 0.598
## mutually 0.858 0.736
## nonsense 0.909 0.827
## nonsensical 0.875 0.766
## not_coveted 0.759 0.577
## pamper 0.861 0.741
## penitentiary 0.711 0.506
## perplexing 0.721 0.520
## persistence 0.757 0.573
## predetermine 0.675 0.456
## pretender 0.824 0.680
## questioning 0.759 0.576
## quickly 0.727 0.529
## rebellious 0.851 0.724
## referee 0.802 0.643
## referendum 0.761 0.579
## relating_to_the_right 0.538 0.289
## relevant 0.833 0.694
## remove 0.990 0.981
## respectful 0.850 0.722
## retailer 0.652 0.426
## retract 0.646 0.418
## ropes 0.837 0.701
## sag 0.746 0.556
## schemer 0.836 0.699
## seize 0.820 0.673
## sensitivity 0.805 0.648
## shadows 0.745 0.555
## silly 0.848 0.719
## sketch 0.750 0.563
## slang 0.868 0.753
## slave 0.471 0.222
## sluggish 0.781 0.610
## somber 0.733 0.537
## spinelessness 0.707 0.499
## sporadic 0.924 0.853
## squad 0.825 0.681
## stagger 0.776 0.602
## stinking 0.940 0.883
## stroll 0.936 0.875
## stubborn_100 0.688 0.473
## stubborn_143 0.731 0.535
## stylish 0.936 0.876
## summit 0.762 0.581
## terminology 0.920 0.847
## the_science_of_speech_sounds 0.839 0.704
## transportation 0.484 0.234
## tyrant 0.888 0.788
## unhealthful 0.684 0.468
## vile 0.922 0.851
## vulgar 0.922 0.850
## wandering 0.583 0.340
## warning 0.746 0.557
## wave 0.804 0.646
## weaponry 0.715 0.512
##
## SS loadings: 98.7
## Proportion Var: 0.62
##
## Factor correlations:
##
## F1
## F1 1
vocab_irt %>% coef(simplify = T)
## $items
## a1 d g u
## a_specific_number 1.177 1.391 0.2 1
## a_type_of_brass_instrument 3.169 3.420 0.2 1
## a_type_of_drapery 2.016 1.367 0.2 1
## a_type_of_fabric_101 2.120 2.970 0.2 1
## a_type_of_fabric_98 2.164 0.886 0.2 1
## a_type_of_grasshopper 3.042 1.783 0.2 1
## a_type_of_hat 2.507 -2.455 0.2 1
## a_type_of_heating_unit 1.411 1.720 0.2 1
## a_type_of_magistrate_position 2.687 2.521 0.2 1
## a_type_of_mathematical_operation 1.383 2.388 0.2 1
## a_type_of_overshoe 1.381 -0.031 0.2 1
## a_type_of_plant 2.487 1.948 0.2 1
## a_type_of_religious_teacher 2.708 1.977 0.2 1
## a_type_of_rock 0.986 2.678 0.2 1
## a_type_of_skirts 1.916 1.509 0.2 1
## a_type_of_smokeless_powder 1.260 0.728 0.2 1
## a_type_of_sound 1.353 0.262 0.2 1
## a_type_of_sword 1.010 -0.184 0.2 1
## a_type_of_volcanic_crater 1.559 1.722 0.2 1
## a_type_of_wavy_form 2.735 -0.244 0.2 1
## actionable_negligence 2.695 1.857 0.2 1
## advisory 1.756 3.504 0.2 1
## advocate 1.834 0.908 0.2 1
## almighty 3.610 4.885 0.2 1
## amazement 2.202 2.542 0.2 1
## amenability 2.678 1.763 0.2 1
## aroma 2.403 5.181 0.2 1
## auspices 2.968 -1.360 0.2 1
## avoid 2.324 7.229 0.2 1
## barely_able_to_read_and_write 3.437 2.667 0.2 1
## because 2.106 2.470 0.2 1
## berate 3.231 2.321 0.2 1
## blade 2.014 3.093 0.2 1
## blend 1.746 2.462 0.2 1
## blunder 3.180 4.087 0.2 1
## bow 3.576 -0.561 0.2 1
## brief 1.258 -0.372 0.2 1
## bring_about 2.918 2.297 0.2 1
## carelessly_or_hastily_put_together 1.888 1.837 0.2 1
## celebration 2.339 0.869 0.2 1
## cheat 3.333 0.789 0.2 1
## cheerful 4.085 9.422 0.2 1
## clay_pigeon_shooting 2.460 3.204 0.2 1
## coarse 2.401 3.195 0.2 1
## collect_or_study_insects 1.965 2.783 0.2 1
## colossal 2.107 1.460 0.2 1
## commotion 4.460 2.993 0.2 1
## complainer 3.473 2.087 0.2 1
## confiscate 3.458 3.275 0.2 1
## congratulate 1.234 1.900 0.2 1
## construct 7.775 24.870 0.2 1
## contemplate 1.373 2.079 0.2 1
## convoy 4.213 1.464 0.2 1
## couch 2.159 0.874 0.2 1
## crease 2.584 1.752 0.2 1
## cunning 2.048 1.127 0.2 1
## deceit 1.460 0.757 0.2 1
## deliberately 2.502 1.571 0.2 1
## deprive 1.573 1.497 0.2 1
## detectable 1.808 3.296 0.2 1
## diatribes 2.103 -0.290 0.2 1
## disjoined 1.002 -0.813 0.2 1
## disrupt 1.843 3.631 0.2 1
## distinct 0.712 1.557 0.2 1
## divergence 3.481 0.581 0.2 1
## dome 1.568 0.671 0.2 1
## downfall 2.275 2.421 0.2 1
## drink 4.520 1.554 0.2 1
## elite 2.345 1.823 0.2 1
## embarrassment 1.261 1.878 0.2 1
## emphasize 2.538 4.240 0.2 1
## empire 1.417 1.215 0.2 1
## empty 2.129 1.847 0.2 1
## entanglement 2.666 -0.851 0.2 1
## environment 3.165 -1.455 0.2 1
## evil 1.370 4.531 0.2 1
## excite 2.877 0.612 0.2 1
## farewell 5.084 6.021 0.2 1
## flammable 2.749 5.679 0.2 1
## flatterer 3.832 2.349 0.2 1
## flatteries 1.679 0.716 0.2 1
## forever 2.905 2.269 0.2 1
## frequent 1.283 3.586 0.2 1
## gigantic 2.182 3.422 0.2 1
## girl 2.544 2.167 0.2 1
## goo 2.483 4.032 0.2 1
## goodbye 3.438 4.060 0.2 1
## greed 4.724 0.061 0.2 1
## groan 2.881 3.030 0.2 1
## gruesome 1.931 1.530 0.2 1
## guarantee 0.990 2.298 0.2 1
## gutter 2.206 3.049 0.2 1
## harmfulness 1.252 1.649 0.2 1
## hobby 1.970 2.290 0.2 1
## hut 2.753 2.546 0.2 1
## illness 1.281 0.538 0.2 1
## impromptu 2.501 -0.236 0.2 1
## indescribable 2.176 3.429 0.2 1
## intellectual 1.827 2.987 0.2 1
## jargon 4.895 5.716 0.2 1
## knowledgeable 3.234 1.859 0.2 1
## lackadaisical 1.620 0.742 0.2 1
## manager 2.527 4.861 0.2 1
## meal 4.805 -0.645 0.2 1
## melodic 2.076 2.032 0.2 1
## mutually 2.839 4.328 0.2 1
## nonsense 3.716 2.797 0.2 1
## nonsensical 3.078 4.863 0.2 1
## not_coveted 1.986 1.730 0.2 1
## pamper 2.882 -1.850 0.2 1
## penitentiary 1.723 2.433 0.2 1
## perplexing 1.773 2.327 0.2 1
## persistence 1.970 1.813 0.2 1
## predetermine 1.557 2.314 0.2 1
## pretender 2.478 5.581 0.2 1
## questioning 1.982 2.258 0.2 1
## quickly 1.803 1.133 0.2 1
## rebellious 2.760 1.886 0.2 1
## referee 2.283 2.429 0.2 1
## referendum 1.994 -1.869 0.2 1
## relating_to_the_right 1.085 0.332 0.2 1
## relevant 2.563 0.430 0.2 1
## remove 12.181 34.206 0.2 1
## respectful 2.741 2.041 0.2 1
## retailer 1.465 1.178 0.2 1
## retract 1.441 1.834 0.2 1
## ropes 2.605 2.114 0.2 1
## sag 1.906 0.479 0.2 1
## schemer 2.594 -0.497 0.2 1
## seize 2.439 2.156 0.2 1
## sensitivity 2.308 0.675 0.2 1
## shadows 1.899 0.416 0.2 1
## silly 2.719 7.500 0.2 1
## sketch 1.931 0.636 0.2 1
## slang 2.970 1.753 0.2 1
## slave 0.910 0.995 0.2 1
## sluggish 2.131 0.706 0.2 1
## somber 1.833 0.183 0.2 1
## spinelessness 1.700 1.428 0.2 1
## sporadic 4.102 0.226 0.2 1
## squad 2.489 6.207 0.2 1
## stagger 2.093 0.936 0.2 1
## stinking 4.678 2.981 0.2 1
## stroll 4.512 0.835 0.2 1
## stubborn_100 1.612 1.691 0.2 1
## stubborn_143 1.824 1.862 0.2 1
## stylish 4.514 -2.432 0.2 1
## summit 2.004 1.892 0.2 1
## terminology 4.003 1.109 0.2 1
## the_science_of_speech_sounds 2.622 3.948 0.2 1
## transportation 0.942 0.140 0.2 1
## tyrant 3.282 -3.729 0.2 1
## unhealthful 1.596 -1.445 0.2 1
## vile 4.064 4.430 0.2 1
## vulgar 4.054 -0.654 0.2 1
## wandering 1.220 -0.417 0.2 1
## warning 1.908 2.148 0.2 1
## wave 2.301 1.495 0.2 1
## weaponry 1.742 1.491 0.2 1
##
## $means
## F1
## 0
##
## $cov
## F1
## F1 1
vocab_irt_scores = fscores(vocab_irt, full.scores.SE = T)
empirical_rxx(vocab_irt_scores)
## F1
## 0.97
plot(vocab_irt, type = "rxx")
d$g_easy = vocab_irt_scores[, 1] %>% standardize()
vocab_item_stats = itemstats(easy_items_scored)
vocab_item_stats$itemstats$g_loading = vocab_irt %>% summary() %>% .$rotF %>% .[, 1]
## F1 h2
## a_specific_number 0.569 0.323
## a_type_of_brass_instrument 0.881 0.776
## a_type_of_drapery 0.764 0.584
## a_type_of_fabric_101 0.780 0.608
## a_type_of_fabric_98 0.786 0.618
## a_type_of_grasshopper 0.873 0.762
## a_type_of_hat 0.827 0.685
## a_type_of_heating_unit 0.638 0.407
## a_type_of_magistrate_position 0.845 0.714
## a_type_of_mathematical_operation 0.631 0.398
## a_type_of_overshoe 0.630 0.397
## a_type_of_plant 0.825 0.681
## a_type_of_religious_teacher 0.847 0.717
## a_type_of_rock 0.501 0.251
## a_type_of_skirts 0.748 0.559
## a_type_of_smokeless_powder 0.595 0.354
## a_type_of_sound 0.622 0.387
## a_type_of_sword 0.510 0.260
## a_type_of_volcanic_crater 0.675 0.456
## a_type_of_wavy_form 0.849 0.721
## actionable_negligence 0.845 0.715
## advisory 0.718 0.516
## advocate 0.733 0.537
## almighty 0.905 0.818
## amazement 0.791 0.626
## amenability 0.844 0.712
## aroma 0.816 0.666
## auspices 0.867 0.753
## avoid 0.807 0.651
## barely_able_to_read_and_write 0.896 0.803
## because 0.778 0.605
## berate 0.885 0.783
## blade 0.764 0.583
## blend 0.716 0.513
## blunder 0.882 0.777
## bow 0.903 0.815
## brief 0.594 0.353
## bring_about 0.864 0.746
## carelessly_or_hastily_put_together 0.743 0.552
## celebration 0.809 0.654
## cheat 0.891 0.793
## cheerful 0.923 0.852
## clay_pigeon_shooting 0.822 0.676
## coarse 0.816 0.665
## collect_or_study_insects 0.756 0.571
## colossal 0.778 0.605
## commotion 0.934 0.873
## complainer 0.898 0.806
## confiscate 0.897 0.805
## congratulate 0.587 0.345
## construct 0.977 0.954
## contemplate 0.628 0.394
## convoy 0.927 0.860
## couch 0.785 0.617
## crease 0.835 0.697
## cunning 0.769 0.591
## deceit 0.651 0.424
## deliberately 0.827 0.684
## deprive 0.679 0.461
## detectable 0.728 0.530
## diatribes 0.777 0.604
## disjoined 0.507 0.257
## disrupt 0.735 0.540
## distinct 0.386 0.149
## divergence 0.898 0.807
## dome 0.677 0.459
## downfall 0.801 0.641
## drink 0.936 0.876
## elite 0.809 0.655
## embarrassment 0.595 0.355
## emphasize 0.830 0.690
## empire 0.640 0.409
## empty 0.781 0.610
## entanglement 0.843 0.710
## environment 0.881 0.776
## evil 0.627 0.393
## excite 0.861 0.741
## farewell 0.948 0.899
## flammable 0.850 0.723
## flatterer 0.914 0.835
## flatteries 0.702 0.493
## forever 0.863 0.744
## frequent 0.602 0.362
## gigantic 0.789 0.622
## girl 0.831 0.691
## goo 0.825 0.680
## goodbye 0.896 0.803
## greed 0.941 0.885
## groan 0.861 0.741
## gruesome 0.750 0.563
## guarantee 0.503 0.253
## gutter 0.792 0.627
## harmfulness 0.593 0.351
## hobby 0.757 0.573
## hut 0.851 0.724
## illness 0.601 0.362
## impromptu 0.827 0.683
## indescribable 0.788 0.620
## intellectual 0.732 0.535
## jargon 0.945 0.892
## knowledgeable 0.885 0.783
## lackadaisical 0.689 0.475
## manager 0.829 0.688
## meal 0.943 0.889
## melodic 0.773 0.598
## mutually 0.858 0.736
## nonsense 0.909 0.827
## nonsensical 0.875 0.766
## not_coveted 0.759 0.577
## pamper 0.861 0.741
## penitentiary 0.711 0.506
## perplexing 0.721 0.520
## persistence 0.757 0.573
## predetermine 0.675 0.456
## pretender 0.824 0.680
## questioning 0.759 0.576
## quickly 0.727 0.529
## rebellious 0.851 0.724
## referee 0.802 0.643
## referendum 0.761 0.579
## relating_to_the_right 0.538 0.289
## relevant 0.833 0.694
## remove 0.990 0.981
## respectful 0.850 0.722
## retailer 0.652 0.426
## retract 0.646 0.418
## ropes 0.837 0.701
## sag 0.746 0.556
## schemer 0.836 0.699
## seize 0.820 0.673
## sensitivity 0.805 0.648
## shadows 0.745 0.555
## silly 0.848 0.719
## sketch 0.750 0.563
## slang 0.868 0.753
## slave 0.471 0.222
## sluggish 0.781 0.610
## somber 0.733 0.537
## spinelessness 0.707 0.499
## sporadic 0.924 0.853
## squad 0.825 0.681
## stagger 0.776 0.602
## stinking 0.940 0.883
## stroll 0.936 0.875
## stubborn_100 0.688 0.473
## stubborn_143 0.731 0.535
## stylish 0.936 0.876
## summit 0.762 0.581
## terminology 0.920 0.847
## the_science_of_speech_sounds 0.839 0.704
## transportation 0.484 0.234
## tyrant 0.888 0.788
## unhealthful 0.684 0.468
## vile 0.922 0.851
## vulgar 0.922 0.850
## wandering 0.583 0.340
## warning 0.746 0.557
## wave 0.804 0.646
## weaponry 0.715 0.512
##
## SS loadings: 98.7
## Proportion Var: 0.62
##
## Factor correlations:
##
## F1
## F1 1
vocab_item_stats$itemstats$mean %>% GG_denhist()
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
vocab_item_stats$itemstats$mean %>% describe2()
GG_scatter(d, "vocab_sumscore", "g_easy")
## `geom_smooth()` using formula = 'y ~ x'
GG_denhist(d, "vocab_sumscore")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_denhist(d, "g_easy")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d$vocab_sumscore_estimate = d$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct
d$vocab_ranking_estimate = d$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did
d %>% select(
vocab_sumscore,
g_easy,
vocab_sumscore_estimate,
vocab_ranking_estimate
) %>% wtd.cors()
## vocab_sumscore g_easy vocab_sumscore_estimate
## vocab_sumscore 1.000 0.962 0.612
## g_easy 0.962 1.000 0.629
## vocab_sumscore_estimate 0.612 0.629 1.000
## vocab_ranking_estimate 0.385 0.406 0.769
## vocab_ranking_estimate
## vocab_sumscore 0.385
## g_easy 0.406
## vocab_sumscore_estimate 0.769
## vocab_ranking_estimate 1.000
GG_scatter(d, "Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct", "vocab_sumscore")
## `geom_smooth()` using formula = 'y ~ x'
#restandardize g
d$g_easy_ageadj = resid(ols(g_easy ~ rcs(age), data = d)) %>% unname()
## number of knots in rcs defaulting to 5
d$g_easy_ageadj_z = standardize(d$g_easy_ageadj, focal_group = d$white_only)
hard_items = read_csv("data/follow up 20231029043500-SurveyExport.csv") %>% filter(Status == "Complete")
## New names:
## Rows: 475 Columns: 373
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (360): Status, Language, Referer, SessionID, User Agent, IP Address, Co... dbl
## (7): Response ID, Longitude, Latitude, New Hidden Value...206, New Hi... lgl
## (4): Contact ID, Legacy Comments, Comments, Tags dttm (2): Time Started, Date
## Submitted
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `recallable:Pick the 2 synonyms` -> `recallable:Pick the 2 synonyms...23`
## • `recallable:Pick the 2 synonyms` -> `recallable:Pick the 2 synonyms...90`
## • `New Hidden Value` -> `New Hidden Value...206`
## • `sack:Pick 3 words that belong together` -> `sack:Pick 3 words that belong
## together...279`
## • `sack:Pick 3 words that belong together` -> `sack:Pick 3 words that belong
## together...283`
## • `jaunty:Pick 3 words that belong together` -> `jaunty:Pick 3 words that
## belong together...336`
## • `jaunty:Pick 3 words that belong together` -> `jaunty:Pick 3 words that
## belong together...361`
## • `New Hidden Value` -> `New Hidden Value...367`
## • `New Hidden Value` -> `New Hidden Value...372`
## • `New Hidden Value` -> `New Hidden Value...373`
hard_items_meta = read_csv("data/follow up prolific_export_649a005bfc9bd0688f8e3304.csv")
## Rows: 465 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): Submission id, Participant id, Status, Completion code, Age, Sex,...
## dbl (2): Time taken, Total approvals
## dttm (4): Started at, Completed at, Reviewed at, Archived at
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hard_items_var_list = df_var_table(hard_items)
#only completed data, only overlap
hard_items_meta %<>% filter(`Participant id` %in% hard_items$`Write in your Prolific ID:`)
hard_items %<>% filter(`Write in your Prolific ID:` %in% hard_items_meta$`Participant id`)
#no dups
assert_that(!anyDuplicated(hard_items$`Write in your Prolific ID:`))
## [1] TRUE
assert_that(!anyDuplicated(hard_items_meta$`Participant id`))
## [1] TRUE
#subset items
hard_items_2of5 = hard_items %>% select(21:205)
hard_items_3of5 = hard_items %>% select(207:366)
hard_items_1of5 = hard_items %>% select(368:371)
#score them
#1st options is always the right one
#but we can't see option order with csv export
hard_items_1of5_scored = score_items(hard_items_1of5,
key = c(
"whispering",
"verbiage",
"fragrance",
"sagacious"
)) %>% as_tibble()
hard_items_2of5_scored = map_df(seq_along(hard_items_2of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_2of5[, unlist(idx)]
i_cols_NA = i_cols
i_cols_NA[] = !is.na(i_cols_NA)
#score as correct using options 1+2
(i_cols_NA[, 1] & i_cols_NA[, 2]) %>% as.vector() %>% as.numeric()
})
hard_items_3of5_scored = map_df(seq_along(hard_items_3of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_3of5[, unlist(idx)]
i_cols_NA = i_cols
i_cols_NA[] = !is.na(i_cols_NA)
#score as correct using options 1+2
(i_cols_NA[, 1] & i_cols_NA[, 2] & i_cols_NA[, 3]) %>% as.vector() %>% as.numeric()
})
#combine
hard_items_scored = bind_cols(
hard_items_1of5_scored %>% set_names("1of5_" + seq_along(hard_items_1of5_scored)),
hard_items_2of5_scored %>% set_names("2of5_" + seq_along(hard_items_2of5_scored)),
hard_items_3of5_scored %>% set_names("3of5_" + seq_along(hard_items_3of5_scored))
)
#item stats
hard_items_scored_stats = itemstats(hard_items_scored)
#IRT fit
hard_items_fit = cache_object(filename = "data/hard_items_fit.rds", expr = mirt(
hard_items_scored,
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 5000),
),
renew = renew_all)
## Cache found, reading object from disk
hard_items_fit
##
## Call:
## mirt(data = hard_items_scored, model = 1, itemtype = "2PL", technical = list(NCYCLES = 5000))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 55 EM iterations.
## mirt version: 1.41.8
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -16226
## Estimated parameters: 146
## AIC = 32744
## BIC = 33339; SABIC = 32875
## G2 (1e+10) = 27181, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
hard_items_fit %>% summary()
## F1 h2
## 1of5_1 0.400 0.16006
## 1of5_2 0.272 0.07408
## 1of5_3 0.293 0.08584
## 1of5_4 0.308 0.09464
## 2of5_1 0.521 0.27105
## 2of5_2 0.582 0.33837
## 2of5_3 0.620 0.38423
## 2of5_4 0.640 0.40957
## 2of5_5 0.546 0.29805
## 2of5_6 0.771 0.59518
## 2of5_7 0.383 0.14651
## 2of5_8 0.629 0.39582
## 2of5_9 0.617 0.38059
## 2of5_10 0.351 0.12339
## 2of5_11 0.397 0.15783
## 2of5_12 0.462 0.21379
## 2of5_13 0.509 0.25893
## 2of5_14 0.350 0.12239
## 2of5_15 0.715 0.51183
## 2of5_16 0.586 0.34328
## 2of5_17 0.731 0.53427
## 2of5_18 0.761 0.57919
## 2of5_19 0.533 0.28458
## 2of5_20 0.385 0.14789
## 2of5_21 0.561 0.31423
## 2of5_22 0.901 0.81262
## 2of5_23 0.372 0.13853
## 2of5_24 0.285 0.08135
## 2of5_25 0.743 0.55173
## 2of5_26 0.743 0.55217
## 2of5_27 0.617 0.38078
## 2of5_28 0.348 0.12118
## 2of5_29 0.607 0.36849
## 2of5_30 0.637 0.40591
## 2of5_31 0.593 0.35144
## 2of5_32 0.583 0.33980
## 2of5_33 0.365 0.13304
## 2of5_34 0.491 0.24127
## 2of5_35 0.346 0.12001
## 2of5_36 0.879 0.77203
## 2of5_37 0.992 0.98379
## 3of5_1 0.247 0.06079
## 3of5_2 0.513 0.26333
## 3of5_3 0.529 0.28004
## 3of5_4 0.736 0.54120
## 3of5_5 0.391 0.15255
## 3of5_6 0.739 0.54626
## 3of5_7 0.607 0.36903
## 3of5_8 0.100 0.01008
## 3of5_9 0.253 0.06387
## 3of5_10 0.799 0.63823
## 3of5_11 0.725 0.52514
## 3of5_12 0.587 0.34506
## 3of5_13 0.318 0.10106
## 3of5_14 0.524 0.27491
## 3of5_15 -0.038 0.00145
## 3of5_16 0.407 0.16550
## 3of5_17 0.494 0.24440
## 3of5_18 0.380 0.14436
## 3of5_19 -0.336 0.11263
## 3of5_20 0.569 0.32320
## 3of5_21 0.420 0.17622
## 3of5_22 0.498 0.24759
## 3of5_23 0.811 0.65806
## 3of5_24 0.609 0.37113
## 3of5_25 0.571 0.32581
## 3of5_26 0.728 0.52989
## 3of5_27 0.589 0.34742
## 3of5_28 0.409 0.16703
## 3of5_29 0.571 0.32574
## 3of5_30 0.590 0.34832
## 3of5_31 0.457 0.20895
## 3of5_32 0.637 0.40528
##
## SS loadings: 22.9
## Proportion Var: 0.313
##
## Factor correlations:
##
## F1
## F1 1
hard_items_scored_stats$itemstats$g_loading = hard_items_fit %>% summary() %>% .$rotF %>% as.vector()
## F1 h2
## 1of5_1 0.400 0.16006
## 1of5_2 0.272 0.07408
## 1of5_3 0.293 0.08584
## 1of5_4 0.308 0.09464
## 2of5_1 0.521 0.27105
## 2of5_2 0.582 0.33837
## 2of5_3 0.620 0.38423
## 2of5_4 0.640 0.40957
## 2of5_5 0.546 0.29805
## 2of5_6 0.771 0.59518
## 2of5_7 0.383 0.14651
## 2of5_8 0.629 0.39582
## 2of5_9 0.617 0.38059
## 2of5_10 0.351 0.12339
## 2of5_11 0.397 0.15783
## 2of5_12 0.462 0.21379
## 2of5_13 0.509 0.25893
## 2of5_14 0.350 0.12239
## 2of5_15 0.715 0.51183
## 2of5_16 0.586 0.34328
## 2of5_17 0.731 0.53427
## 2of5_18 0.761 0.57919
## 2of5_19 0.533 0.28458
## 2of5_20 0.385 0.14789
## 2of5_21 0.561 0.31423
## 2of5_22 0.901 0.81262
## 2of5_23 0.372 0.13853
## 2of5_24 0.285 0.08135
## 2of5_25 0.743 0.55173
## 2of5_26 0.743 0.55217
## 2of5_27 0.617 0.38078
## 2of5_28 0.348 0.12118
## 2of5_29 0.607 0.36849
## 2of5_30 0.637 0.40591
## 2of5_31 0.593 0.35144
## 2of5_32 0.583 0.33980
## 2of5_33 0.365 0.13304
## 2of5_34 0.491 0.24127
## 2of5_35 0.346 0.12001
## 2of5_36 0.879 0.77203
## 2of5_37 0.992 0.98379
## 3of5_1 0.247 0.06079
## 3of5_2 0.513 0.26333
## 3of5_3 0.529 0.28004
## 3of5_4 0.736 0.54120
## 3of5_5 0.391 0.15255
## 3of5_6 0.739 0.54626
## 3of5_7 0.607 0.36903
## 3of5_8 0.100 0.01008
## 3of5_9 0.253 0.06387
## 3of5_10 0.799 0.63823
## 3of5_11 0.725 0.52514
## 3of5_12 0.587 0.34506
## 3of5_13 0.318 0.10106
## 3of5_14 0.524 0.27491
## 3of5_15 -0.038 0.00145
## 3of5_16 0.407 0.16550
## 3of5_17 0.494 0.24440
## 3of5_18 0.380 0.14436
## 3of5_19 -0.336 0.11263
## 3of5_20 0.569 0.32320
## 3of5_21 0.420 0.17622
## 3of5_22 0.498 0.24759
## 3of5_23 0.811 0.65806
## 3of5_24 0.609 0.37113
## 3of5_25 0.571 0.32581
## 3of5_26 0.728 0.52989
## 3of5_27 0.589 0.34742
## 3of5_28 0.409 0.16703
## 3of5_29 0.571 0.32574
## 3of5_30 0.590 0.34832
## 3of5_31 0.457 0.20895
## 3of5_32 0.637 0.40528
##
## SS loadings: 22.9
## Proportion Var: 0.313
##
## Factor correlations:
##
## F1
## F1 1
hard_items_scored_stats$itemstats
hard_items_scored_stats$itemstats %>% describe2()
#difficulties
hard_items_scored_stats$itemstats$mean %>% GG_denhist()
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
#loadings
hard_items_scored_stats$itemstats$g_loading %>% GG_denhist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
plot(hard_items_fit, type = "rxx")
hard_items_fit_scores = fscores(hard_items_fit, full.scores.SE = T)
empirical_rxx(hard_items_fit_scores)
## F1
## 0.942
#time spent vs. score
# hard_items_scored$time_spent_mins = (hard_items$`Date Submitted`-hard_items$`Time Started`)/60
#exact choices table
hard_items_2of5_choices = map_df(seq_along(hard_items_2of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_2of5[, unlist(idx)]
#string collapse across columns
apply(i_cols, 1, function(x) {
str_c(na.omit(x), collapse = ", ")
})
}) %>% set_colnames("pick2of5_" + 1:ncol(hard_items_2of5_scored))
hard_items_3of5_choices = map_df(seq_along(hard_items_3of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_3of5[, unlist(idx)]
#string collapse across columns
apply(i_cols, 1, function(x) {
str_c(na.omit(x), collapse = ", ")
})
}) %>% set_colnames("pick3of5_" + 1:ncol(hard_items_3of5_scored))
Easy and hard items together
#merge data
all_items_scored = left_join(
easy_items_scored %>% mutate(id = d$Participant_id),
hard_items_scored %>% mutate(id = hard_items$`Write in your Prolific ID:`),
by = "id"
)
#no dups
assert_that(!anyDuplicated(all_items_scored$id))
## [1] TRUE
#fit all dataset to same model
#takes a while to converge at default settings
#not even after 20k iter
all_items_fit = cache_object(filename = "data/all_items_fit.rds", expr = mirt(
all_items_scored %>% select(-id),
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 2000)
),
renew = renew_all)
## Cache found, reading object from disk
#item stats
all_items_stats = itemstats(all_items_scored %>% select(-id))
all_items_fit
##
## Call:
## mirt(data = all_items_scored %>% select(-id), model = 1, itemtype = "2PL",
## technical = list(NCYCLES = 2000))
##
## Full-information item factor analysis with 1 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 2000 EM iterations.
## mirt version: 1.41.8
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -44440
## Estimated parameters: 464
## AIC = 89809
## BIC = 91764; SABIC = 90291
all_items_fit %>% summary()
## F1 h2
## a_specific_number 0.5402 0.29178
## a_type_of_brass_instrument 0.8740 0.76391
## a_type_of_drapery 0.7115 0.50621
## a_type_of_fabric_101 0.7985 0.63765
## a_type_of_fabric_98 0.6737 0.45382
## a_type_of_grasshopper 0.7928 0.62846
## a_type_of_hat 0.5003 0.25035
## a_type_of_heating_unit 0.6137 0.37662
## a_type_of_magistrate_position 0.8120 0.65930
## a_type_of_mathematical_operation 0.6522 0.42535
## a_type_of_overshoe 0.4724 0.22316
## a_type_of_plant 0.7854 0.61691
## a_type_of_religious_teacher 0.7917 0.62682
## a_type_of_rock 0.5047 0.25471
## a_type_of_skirts 0.6916 0.47825
## a_type_of_smokeless_powder 0.5355 0.28671
## a_type_of_sound 0.5012 0.25118
## a_type_of_sword 0.3829 0.14664
## a_type_of_volcanic_crater 0.6813 0.46415
## a_type_of_wavy_form 0.7212 0.52011
## actionable_negligence 0.8212 0.67437
## advisory 0.8008 0.64122
## advocate 0.6281 0.39447
## almighty 0.9206 0.84750
## amazement 0.8094 0.65521
## amenability 0.8035 0.64558
## aroma 0.9202 0.84675
## auspices 0.6018 0.36214
## avoid 0.9104 0.82890
## barely_able_to_read_and_write 0.8564 0.73340
## because 0.7820 0.61145
## berate 0.7881 0.62109
## blade 0.7893 0.62293
## blend 0.7344 0.53932
## blunder 0.8867 0.78616
## bow 0.7573 0.57349
## brief 0.4498 0.20231
## bring_about 0.8233 0.67781
## carelessly_or_hastily_put_together 0.7152 0.51158
## celebration 0.7254 0.52621
## cheat 0.7953 0.63250
## cheerful 0.9771 0.95480
## clay_pigeon_shooting 0.8116 0.65874
## coarse 0.8335 0.69470
## collect_or_study_insects 0.7704 0.59350
## colossal 0.7277 0.52952
## commotion 0.8922 0.79605
## complainer 0.8110 0.65775
## confiscate 0.8433 0.71114
## congratulate 0.5825 0.33929
## construct 0.9947 0.98949
## contemplate 0.6225 0.38754
## convoy 0.8539 0.72909
## couch 0.6832 0.46681
## crease 0.7893 0.62293
## cunning 0.6950 0.48299
## deceit 0.5559 0.30903
## deliberately 0.7557 0.57101
## deprive 0.6533 0.42684
## detectable 0.7698 0.59256
## diatribes 0.6130 0.37573
## disjoined 0.3336 0.11127
## disrupt 0.8202 0.67280
## distinct 0.3515 0.12352
## divergence 0.7757 0.60164
## dome 0.5769 0.33286
## downfall 0.8049 0.64778
## drink 0.8212 0.67442
## elite 0.7477 0.55905
## embarrassment 0.6113 0.37369
## emphasize 0.8872 0.78708
## empire 0.5812 0.33784
## empty 0.7691 0.59158
## entanglement 0.7057 0.49800
## environment 0.5656 0.31987
## evil 0.7274 0.52906
## excite 0.7943 0.63090
## farewell 0.9395 0.88260
## flammable 0.9257 0.85696
## flatterer 0.8856 0.78428
## flatteries 0.5853 0.34253
## forever 0.8159 0.66569
## frequent 0.6202 0.38469
## gigantic 0.8540 0.72929
## girl 0.7739 0.59899
## goo 0.8689 0.75490
## goodbye 0.8958 0.80254
## greed 0.8170 0.66756
## groan 0.8422 0.70933
## gruesome 0.7053 0.49747
## guarantee 0.5134 0.26359
## gutter 0.8215 0.67486
## harmfulness 0.5994 0.35932
## hobby 0.7710 0.59445
## hut 0.8505 0.72337
## illness 0.5155 0.26575
## impromptu 0.6212 0.38583
## indescribable 0.8333 0.69436
## intellectual 0.7575 0.57379
## jargon 0.9500 0.90248
## knowledgeable 0.8314 0.69129
## lackadaisical 0.5579 0.31121
## manager 0.9059 0.82058
## meal 0.7375 0.54386
## melodic 0.7332 0.53752
## mutually 0.8811 0.77631
## nonsense 0.8746 0.76497
## nonsensical 0.9169 0.84071
## not_coveted 0.7009 0.49121
## pamper 0.5639 0.31793
## penitentiary 0.7132 0.50866
## perplexing 0.7308 0.53408
## persistence 0.7378 0.54431
## predetermine 0.6998 0.48978
## pretender 0.9175 0.84176
## questioning 0.7481 0.55965
## quickly 0.6500 0.42249
## rebellious 0.8064 0.65025
## referee 0.7960 0.63365
## referendum 0.4748 0.22543
## relating_to_the_right 0.4511 0.20346
## relevant 0.7349 0.54010
## remove 0.9966 0.99326
## respectful 0.7963 0.63411
## retailer 0.6267 0.39270
## retract 0.6419 0.41201
## ropes 0.7853 0.61673
## sag 0.6624 0.43880
## schemer 0.6338 0.40171
## seize 0.7799 0.60830
## sensitivity 0.7009 0.49122
## shadows 0.6090 0.37084
## silly 0.9573 0.91638
## sketch 0.6401 0.40976
## slang 0.8112 0.65800
## slave 0.4300 0.18489
## sluggish 0.6997 0.48957
## somber 0.5596 0.31316
## spinelessness 0.6696 0.44839
## sporadic 0.8349 0.69700
## squad 0.9409 0.88536
## stagger 0.7265 0.52779
## stinking 0.8940 0.79922
## stroll 0.8496 0.72189
## stubborn_100 0.6696 0.44831
## stubborn_143 0.7074 0.50042
## stylish 0.6282 0.39464
## summit 0.7413 0.54959
## terminology 0.8137 0.66218
## the_science_of_speech_sounds 0.8670 0.75171
## transportation 0.3848 0.14807
## tyrant 0.4110 0.16891
## unhealthful 0.4015 0.16124
## vile 0.8990 0.80818
## vulgar 0.7444 0.55412
## wandering 0.4161 0.17310
## warning 0.7491 0.56109
## wave 0.7362 0.54193
## weaponry 0.6727 0.45252
## 1of5_1 0.3457 0.11952
## 1of5_2 0.2166 0.04691
## 1of5_3 0.2691 0.07241
## 1of5_4 0.2739 0.07503
## 2of5_1 0.4395 0.19317
## 2of5_2 0.5234 0.27396
## 2of5_3 0.5978 0.35733
## 2of5_4 0.5995 0.35935
## 2of5_5 0.5419 0.29369
## 2of5_6 0.7464 0.55704
## 2of5_7 0.3704 0.13720
## 2of5_8 0.5831 0.34001
## 2of5_9 0.5813 0.33793
## 2of5_10 0.3447 0.11882
## 2of5_11 0.3982 0.15858
## 2of5_12 0.4071 0.16569
## 2of5_13 0.4935 0.24352
## 2of5_14 0.3212 0.10317
## 2of5_15 0.6787 0.46059
## 2of5_16 0.5334 0.28453
## 2of5_17 0.6974 0.48635
## 2of5_18 0.7224 0.52183
## 2of5_19 0.4725 0.22321
## 2of5_20 0.3381 0.11429
## 2of5_21 0.5263 0.27701
## 2of5_22 0.8467 0.71692
## 2of5_23 0.3233 0.10450
## 2of5_24 0.2244 0.05037
## 2of5_25 0.7314 0.53496
## 2of5_26 0.7083 0.50163
## 2of5_27 0.5498 0.30230
## 2of5_28 0.3321 0.11026
## 2of5_29 0.5641 0.31823
## 2of5_30 0.5882 0.34600
## 2of5_31 0.5485 0.30083
## 2of5_32 0.5008 0.25076
## 2of5_33 0.3161 0.09993
## 2of5_34 0.4379 0.19176
## 2of5_35 0.3107 0.09655
## 2of5_36 0.8086 0.65389
## 2of5_37 0.8983 0.80698
## 3of5_1 0.2613 0.06825
## 3of5_2 0.4582 0.20990
## 3of5_3 0.4801 0.23052
## 3of5_4 0.6882 0.47361
## 3of5_5 0.3766 0.14179
## 3of5_6 0.7321 0.53603
## 3of5_7 0.5657 0.32005
## 3of5_8 0.0823 0.00677
## 3of5_9 0.2377 0.05652
## 3of5_10 0.8191 0.67092
## 3of5_11 0.6980 0.48725
## 3of5_12 0.5524 0.30516
## 3of5_13 0.3193 0.10198
## 3of5_14 0.5119 0.26200
## 3of5_15 -0.0141 0.00020
## 3of5_16 0.3939 0.15517
## 3of5_17 0.4288 0.18389
## 3of5_18 0.3632 0.13191
## 3of5_19 -0.2751 0.07570
## 3of5_20 0.5381 0.28951
## 3of5_21 0.4473 0.20005
## 3of5_22 0.4683 0.21933
## 3of5_23 0.8638 0.74621
## 3of5_24 0.5869 0.34448
## 3of5_25 0.5332 0.28427
## 3of5_26 0.6875 0.47259
## 3of5_27 0.5614 0.31522
## 3of5_28 0.4422 0.19556
## 3of5_29 0.5839 0.34089
## 3of5_30 0.5036 0.25362
## 3of5_31 0.4652 0.21639
## 3of5_32 0.6161 0.37953
##
## SS loadings: 108
## Proportion Var: 0.463
##
## Factor correlations:
##
## F1
## F1 1
all_items_stats$itemstats$g_loading = all_items_fit %>% summary() %>% .$rotF %>% as.vector()
## F1 h2
## a_specific_number 0.5402 0.29178
## a_type_of_brass_instrument 0.8740 0.76391
## a_type_of_drapery 0.7115 0.50621
## a_type_of_fabric_101 0.7985 0.63765
## a_type_of_fabric_98 0.6737 0.45382
## a_type_of_grasshopper 0.7928 0.62846
## a_type_of_hat 0.5003 0.25035
## a_type_of_heating_unit 0.6137 0.37662
## a_type_of_magistrate_position 0.8120 0.65930
## a_type_of_mathematical_operation 0.6522 0.42535
## a_type_of_overshoe 0.4724 0.22316
## a_type_of_plant 0.7854 0.61691
## a_type_of_religious_teacher 0.7917 0.62682
## a_type_of_rock 0.5047 0.25471
## a_type_of_skirts 0.6916 0.47825
## a_type_of_smokeless_powder 0.5355 0.28671
## a_type_of_sound 0.5012 0.25118
## a_type_of_sword 0.3829 0.14664
## a_type_of_volcanic_crater 0.6813 0.46415
## a_type_of_wavy_form 0.7212 0.52011
## actionable_negligence 0.8212 0.67437
## advisory 0.8008 0.64122
## advocate 0.6281 0.39447
## almighty 0.9206 0.84750
## amazement 0.8094 0.65521
## amenability 0.8035 0.64558
## aroma 0.9202 0.84675
## auspices 0.6018 0.36214
## avoid 0.9104 0.82890
## barely_able_to_read_and_write 0.8564 0.73340
## because 0.7820 0.61145
## berate 0.7881 0.62109
## blade 0.7893 0.62293
## blend 0.7344 0.53932
## blunder 0.8867 0.78616
## bow 0.7573 0.57349
## brief 0.4498 0.20231
## bring_about 0.8233 0.67781
## carelessly_or_hastily_put_together 0.7152 0.51158
## celebration 0.7254 0.52621
## cheat 0.7953 0.63250
## cheerful 0.9771 0.95480
## clay_pigeon_shooting 0.8116 0.65874
## coarse 0.8335 0.69470
## collect_or_study_insects 0.7704 0.59350
## colossal 0.7277 0.52952
## commotion 0.8922 0.79605
## complainer 0.8110 0.65775
## confiscate 0.8433 0.71114
## congratulate 0.5825 0.33929
## construct 0.9947 0.98949
## contemplate 0.6225 0.38754
## convoy 0.8539 0.72909
## couch 0.6832 0.46681
## crease 0.7893 0.62293
## cunning 0.6950 0.48299
## deceit 0.5559 0.30903
## deliberately 0.7557 0.57101
## deprive 0.6533 0.42684
## detectable 0.7698 0.59256
## diatribes 0.6130 0.37573
## disjoined 0.3336 0.11127
## disrupt 0.8202 0.67280
## distinct 0.3515 0.12352
## divergence 0.7757 0.60164
## dome 0.5769 0.33286
## downfall 0.8049 0.64778
## drink 0.8212 0.67442
## elite 0.7477 0.55905
## embarrassment 0.6113 0.37369
## emphasize 0.8872 0.78708
## empire 0.5812 0.33784
## empty 0.7691 0.59158
## entanglement 0.7057 0.49800
## environment 0.5656 0.31987
## evil 0.7274 0.52906
## excite 0.7943 0.63090
## farewell 0.9395 0.88260
## flammable 0.9257 0.85696
## flatterer 0.8856 0.78428
## flatteries 0.5853 0.34253
## forever 0.8159 0.66569
## frequent 0.6202 0.38469
## gigantic 0.8540 0.72929
## girl 0.7739 0.59899
## goo 0.8689 0.75490
## goodbye 0.8958 0.80254
## greed 0.8170 0.66756
## groan 0.8422 0.70933
## gruesome 0.7053 0.49747
## guarantee 0.5134 0.26359
## gutter 0.8215 0.67486
## harmfulness 0.5994 0.35932
## hobby 0.7710 0.59445
## hut 0.8505 0.72337
## illness 0.5155 0.26575
## impromptu 0.6212 0.38583
## indescribable 0.8333 0.69436
## intellectual 0.7575 0.57379
## jargon 0.9500 0.90248
## knowledgeable 0.8314 0.69129
## lackadaisical 0.5579 0.31121
## manager 0.9059 0.82058
## meal 0.7375 0.54386
## melodic 0.7332 0.53752
## mutually 0.8811 0.77631
## nonsense 0.8746 0.76497
## nonsensical 0.9169 0.84071
## not_coveted 0.7009 0.49121
## pamper 0.5639 0.31793
## penitentiary 0.7132 0.50866
## perplexing 0.7308 0.53408
## persistence 0.7378 0.54431
## predetermine 0.6998 0.48978
## pretender 0.9175 0.84176
## questioning 0.7481 0.55965
## quickly 0.6500 0.42249
## rebellious 0.8064 0.65025
## referee 0.7960 0.63365
## referendum 0.4748 0.22543
## relating_to_the_right 0.4511 0.20346
## relevant 0.7349 0.54010
## remove 0.9966 0.99326
## respectful 0.7963 0.63411
## retailer 0.6267 0.39270
## retract 0.6419 0.41201
## ropes 0.7853 0.61673
## sag 0.6624 0.43880
## schemer 0.6338 0.40171
## seize 0.7799 0.60830
## sensitivity 0.7009 0.49122
## shadows 0.6090 0.37084
## silly 0.9573 0.91638
## sketch 0.6401 0.40976
## slang 0.8112 0.65800
## slave 0.4300 0.18489
## sluggish 0.6997 0.48957
## somber 0.5596 0.31316
## spinelessness 0.6696 0.44839
## sporadic 0.8349 0.69700
## squad 0.9409 0.88536
## stagger 0.7265 0.52779
## stinking 0.8940 0.79922
## stroll 0.8496 0.72189
## stubborn_100 0.6696 0.44831
## stubborn_143 0.7074 0.50042
## stylish 0.6282 0.39464
## summit 0.7413 0.54959
## terminology 0.8137 0.66218
## the_science_of_speech_sounds 0.8670 0.75171
## transportation 0.3848 0.14807
## tyrant 0.4110 0.16891
## unhealthful 0.4015 0.16124
## vile 0.8990 0.80818
## vulgar 0.7444 0.55412
## wandering 0.4161 0.17310
## warning 0.7491 0.56109
## wave 0.7362 0.54193
## weaponry 0.6727 0.45252
## 1of5_1 0.3457 0.11952
## 1of5_2 0.2166 0.04691
## 1of5_3 0.2691 0.07241
## 1of5_4 0.2739 0.07503
## 2of5_1 0.4395 0.19317
## 2of5_2 0.5234 0.27396
## 2of5_3 0.5978 0.35733
## 2of5_4 0.5995 0.35935
## 2of5_5 0.5419 0.29369
## 2of5_6 0.7464 0.55704
## 2of5_7 0.3704 0.13720
## 2of5_8 0.5831 0.34001
## 2of5_9 0.5813 0.33793
## 2of5_10 0.3447 0.11882
## 2of5_11 0.3982 0.15858
## 2of5_12 0.4071 0.16569
## 2of5_13 0.4935 0.24352
## 2of5_14 0.3212 0.10317
## 2of5_15 0.6787 0.46059
## 2of5_16 0.5334 0.28453
## 2of5_17 0.6974 0.48635
## 2of5_18 0.7224 0.52183
## 2of5_19 0.4725 0.22321
## 2of5_20 0.3381 0.11429
## 2of5_21 0.5263 0.27701
## 2of5_22 0.8467 0.71692
## 2of5_23 0.3233 0.10450
## 2of5_24 0.2244 0.05037
## 2of5_25 0.7314 0.53496
## 2of5_26 0.7083 0.50163
## 2of5_27 0.5498 0.30230
## 2of5_28 0.3321 0.11026
## 2of5_29 0.5641 0.31823
## 2of5_30 0.5882 0.34600
## 2of5_31 0.5485 0.30083
## 2of5_32 0.5008 0.25076
## 2of5_33 0.3161 0.09993
## 2of5_34 0.4379 0.19176
## 2of5_35 0.3107 0.09655
## 2of5_36 0.8086 0.65389
## 2of5_37 0.8983 0.80698
## 3of5_1 0.2613 0.06825
## 3of5_2 0.4582 0.20990
## 3of5_3 0.4801 0.23052
## 3of5_4 0.6882 0.47361
## 3of5_5 0.3766 0.14179
## 3of5_6 0.7321 0.53603
## 3of5_7 0.5657 0.32005
## 3of5_8 0.0823 0.00677
## 3of5_9 0.2377 0.05652
## 3of5_10 0.8191 0.67092
## 3of5_11 0.6980 0.48725
## 3of5_12 0.5524 0.30516
## 3of5_13 0.3193 0.10198
## 3of5_14 0.5119 0.26200
## 3of5_15 -0.0141 0.00020
## 3of5_16 0.3939 0.15517
## 3of5_17 0.4288 0.18389
## 3of5_18 0.3632 0.13191
## 3of5_19 -0.2751 0.07570
## 3of5_20 0.5381 0.28951
## 3of5_21 0.4473 0.20005
## 3of5_22 0.4683 0.21933
## 3of5_23 0.8638 0.74621
## 3of5_24 0.5869 0.34448
## 3of5_25 0.5332 0.28427
## 3of5_26 0.6875 0.47259
## 3of5_27 0.5614 0.31522
## 3of5_28 0.4422 0.19556
## 3of5_29 0.5839 0.34089
## 3of5_30 0.5036 0.25362
## 3of5_31 0.4652 0.21639
## 3of5_32 0.6161 0.37953
##
## SS loadings: 108
## Proportion Var: 0.463
##
## Factor correlations:
##
## F1
## F1 1
all_items_stats$itemstats$discrim = coef(all_items_fit, simplify = T)$items[, 1] %>% unname()
all_items_stats$itemstats$difficulty = -coef(all_items_fit, simplify = T)$items[, 2] %>% unname()
all_items_stats$itemstats
all_items_stats$itemstats %>%
describe2()
#mean rates by wave
all_items_stats$itemstats %>%
filter(N == 499) %>%
describe2()
all_items_stats$itemstats %>%
filter(N < 499) %>%
describe2()
#difficulties
all_items_stats$itemstats$mean %>% GG_denhist() +
scale_x_continuous("Pass rate")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
GG_save("figs/all items pass rate.png")
#loadings
all_items_stats$itemstats$g_loading %>% GG_denhist() +
scale_x_continuous("Factor loading")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/all items factor loading.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
plot(all_items_fit, type = "rxx")
rxx_info = plot(all_items_fit, type = "rxx")
all_items_fit_scores = fscores(all_items_fit, full.scores.SE = T)
empirical_rxx(all_items_fit_scores)
## F1
## 0.977
#which range has >.90?
d_rxx_info = tibble(
z = rxx_info$panel.args[[1]]$x,
rel = rxx_info$panel.args[[1]]$y
)
d_rxx_info %>%
filter(rel > .90) %>%
describe2()
d_rxx_info %>%
filter(rel > .80) %>%
describe2()
d_rxx_info %>%
ggplot(aes(z, rel)) +
geom_line() +
scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
scale_x_continuous("Ability level (z)") +
coord_cartesian(xlim = c(-4, 4))
GG_save("figs/reliability as function of ability.png")
d_rxx_info %>%
filter(z >= -2, z <= 2) %>%
describe2()
d_rxx_info %>%
filter(z >= -3, z <= 3) %>%
describe2()
#difficulty and g-loading
all_items_stats$itemstats %>%
rownames_to_column() %>%
GG_scatter("mean", "g_loading", case_names = "rowname") +
xlab("Pass rate") +
ylab("Factor loading")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/scatter pass rate loading.png")
## `geom_smooth()` using formula = 'y ~ x'
#merge g score back to main dataset
d2 = left_join(
d,
tibble(id = all_items_scored$id,
g = all_items_fit_scores[, 1]),
by = c("Participant_id" = "id")
)
assert_that(!anyDuplicated(d2$Participant_id))
## [1] TRUE
#restandardize g for age, white subset
d2$g_ageadj = resid(ols(g ~ rcs(age), data = d2)) %>% unname()
## number of knots in rcs defaulting to 5
d2$g_ageadj_z = standardize(d2$g, focal_group = d2$white_only)
Drop a few bad items
all_items_stats$itemstats %>% filter(g_loading < .25)
good_items_scored = all_items_scored %>% select(-!!(all_items_stats$itemstats %>% filter(g_loading < .25) %>% rownames()))
#refit
good_items_fit = cache_object(filename = "good_items_fit.rds", expr = mirt(
good_items_scored %>% select(-id),
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 2000)
),
renew = renew_all)
## Cache found, reading object from disk
good_items_fit
##
## Call:
## mirt(data = good_items_scored %>% select(-id), model = 1, itemtype = "2PL",
## technical = list(NCYCLES = 2000))
##
## Full-information item factor analysis with 1 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 2000 EM iterations.
## mirt version: 1.41.8
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -43177
## Estimated parameters: 452
## AIC = 87258
## BIC = 89162; SABIC = 87727
good_items_fit %>% summary()
## F1 h2
## a_specific_number 0.541 0.2927
## a_type_of_brass_instrument 0.875 0.7648
## a_type_of_drapery 0.713 0.5084
## a_type_of_fabric_101 0.799 0.6387
## a_type_of_fabric_98 0.675 0.4552
## a_type_of_grasshopper 0.793 0.6295
## a_type_of_hat 0.501 0.2513
## a_type_of_heating_unit 0.614 0.3776
## a_type_of_magistrate_position 0.813 0.6616
## a_type_of_mathematical_operation 0.653 0.4260
## a_type_of_overshoe 0.472 0.2231
## a_type_of_plant 0.786 0.6174
## a_type_of_religious_teacher 0.792 0.6273
## a_type_of_rock 0.507 0.2572
## a_type_of_skirts 0.692 0.4794
## a_type_of_smokeless_powder 0.536 0.2872
## a_type_of_sound 0.502 0.2522
## a_type_of_sword 0.383 0.1468
## a_type_of_volcanic_crater 0.682 0.4645
## a_type_of_wavy_form 0.722 0.5206
## actionable_negligence 0.822 0.6758
## advisory 0.801 0.6413
## advocate 0.629 0.3956
## almighty 0.921 0.8475
## amazement 0.810 0.6559
## amenability 0.804 0.6472
## aroma 0.921 0.8476
## auspices 0.602 0.3629
## avoid 0.910 0.8282
## barely_able_to_read_and_write 0.857 0.7346
## because 0.782 0.6123
## berate 0.789 0.6218
## blade 0.790 0.6238
## blend 0.735 0.5408
## blunder 0.887 0.7871
## bow 0.758 0.5745
## brief 0.449 0.2020
## bring_about 0.824 0.6795
## carelessly_or_hastily_put_together 0.716 0.5128
## celebration 0.726 0.5265
## cheat 0.796 0.6339
## cheerful 0.977 0.9545
## clay_pigeon_shooting 0.812 0.6600
## coarse 0.834 0.6953
## collect_or_study_insects 0.771 0.5946
## colossal 0.728 0.5306
## commotion 0.893 0.7968
## complainer 0.812 0.6589
## confiscate 0.844 0.7121
## congratulate 0.583 0.3397
## construct 0.994 0.9882
## contemplate 0.622 0.3872
## convoy 0.855 0.7303
## couch 0.684 0.4681
## crease 0.789 0.6232
## cunning 0.696 0.4842
## deceit 0.556 0.3096
## deliberately 0.756 0.5719
## deprive 0.654 0.4276
## detectable 0.770 0.5931
## diatribes 0.613 0.3763
## disjoined 0.334 0.1113
## disrupt 0.820 0.6729
## distinct 0.351 0.1231
## divergence 0.776 0.6023
## dome 0.578 0.3336
## downfall 0.805 0.6486
## drink 0.822 0.6751
## elite 0.748 0.5599
## embarrassment 0.612 0.3745
## emphasize 0.888 0.7878
## empire 0.582 0.3386
## empty 0.770 0.5923
## entanglement 0.706 0.4988
## environment 0.566 0.3209
## evil 0.728 0.5304
## excite 0.795 0.6316
## farewell 0.940 0.8831
## flammable 0.926 0.8572
## flatterer 0.886 0.7849
## flatteries 0.586 0.3435
## forever 0.817 0.6668
## frequent 0.621 0.3855
## gigantic 0.854 0.7298
## girl 0.774 0.5987
## goo 0.870 0.7563
## goodbye 0.896 0.8033
## greed 0.818 0.6683
## groan 0.843 0.7102
## gruesome 0.707 0.4992
## guarantee 0.514 0.2646
## gutter 0.822 0.6761
## harmfulness 0.600 0.3598
## hobby 0.772 0.5957
## hut 0.851 0.7240
## illness 0.516 0.2661
## impromptu 0.621 0.3861
## indescribable 0.833 0.6947
## intellectual 0.758 0.5747
## jargon 0.950 0.9027
## knowledgeable 0.832 0.6925
## lackadaisical 0.559 0.3125
## manager 0.906 0.8204
## meal 0.739 0.5455
## melodic 0.734 0.5381
## mutually 0.881 0.7770
## nonsense 0.875 0.7661
## nonsensical 0.917 0.8409
## not_coveted 0.701 0.4919
## pamper 0.564 0.3186
## penitentiary 0.714 0.5102
## perplexing 0.731 0.5348
## persistence 0.739 0.5461
## predetermine 0.700 0.4899
## pretender 0.918 0.8425
## questioning 0.749 0.5607
## quickly 0.651 0.4233
## rebellious 0.807 0.6512
## referee 0.796 0.6344
## referendum 0.475 0.2257
## relating_to_the_right 0.452 0.2045
## relevant 0.736 0.5414
## remove 0.996 0.9926
## respectful 0.796 0.6343
## retailer 0.627 0.3936
## retract 0.643 0.4136
## ropes 0.786 0.6173
## sag 0.662 0.4389
## schemer 0.634 0.4023
## seize 0.780 0.6091
## sensitivity 0.701 0.4918
## shadows 0.609 0.3705
## silly 0.957 0.9163
## sketch 0.641 0.4106
## slang 0.812 0.6589
## slave 0.430 0.1850
## sluggish 0.699 0.4891
## somber 0.560 0.3133
## spinelessness 0.670 0.4493
## sporadic 0.835 0.6980
## squad 0.942 0.8866
## stagger 0.727 0.5287
## stinking 0.894 0.7999
## stroll 0.850 0.7231
## stubborn_100 0.670 0.4492
## stubborn_143 0.708 0.5008
## stylish 0.630 0.3963
## summit 0.742 0.5501
## terminology 0.814 0.6630
## the_science_of_speech_sounds 0.867 0.7522
## transportation 0.385 0.1483
## tyrant 0.411 0.1689
## unhealthful 0.402 0.1619
## vile 0.899 0.8088
## vulgar 0.745 0.5550
## wandering 0.416 0.1733
## warning 0.749 0.5614
## wave 0.737 0.5433
## weaponry 0.673 0.4531
## 1of5_1 0.346 0.1195
## 1of5_3 0.270 0.0729
## 1of5_4 0.274 0.0753
## 2of5_1 0.439 0.1927
## 2of5_2 0.523 0.2737
## 2of5_3 0.598 0.3575
## 2of5_4 0.600 0.3599
## 2of5_5 0.543 0.2948
## 2of5_6 0.747 0.5579
## 2of5_7 0.371 0.1373
## 2of5_8 0.583 0.3399
## 2of5_9 0.582 0.3384
## 2of5_10 0.345 0.1190
## 2of5_11 0.398 0.1585
## 2of5_12 0.407 0.1653
## 2of5_13 0.494 0.2436
## 2of5_14 0.320 0.1024
## 2of5_15 0.679 0.4611
## 2of5_16 0.534 0.2849
## 2of5_17 0.697 0.4863
## 2of5_18 0.722 0.5215
## 2of5_19 0.474 0.2242
## 2of5_20 0.338 0.1141
## 2of5_21 0.526 0.2766
## 2of5_22 0.847 0.7172
## 2of5_23 0.322 0.1035
## 2of5_25 0.732 0.5352
## 2of5_26 0.708 0.5014
## 2of5_27 0.551 0.3037
## 2of5_28 0.332 0.1100
## 2of5_29 0.565 0.3188
## 2of5_30 0.589 0.3466
## 2of5_31 0.549 0.3013
## 2of5_32 0.500 0.2504
## 2of5_33 0.317 0.1002
## 2of5_34 0.438 0.1921
## 2of5_35 0.311 0.0970
## 2of5_36 0.809 0.6546
## 2of5_37 0.898 0.8067
## 3of5_1 0.261 0.0680
## 3of5_2 0.458 0.2098
## 3of5_3 0.481 0.2309
## 3of5_4 0.688 0.4729
## 3of5_5 0.376 0.1413
## 3of5_6 0.733 0.5374
## 3of5_7 0.567 0.3212
## 3of5_10 0.820 0.6725
## 3of5_11 0.698 0.4877
## 3of5_12 0.553 0.3054
## 3of5_13 0.321 0.1028
## 3of5_14 0.512 0.2624
## 3of5_16 0.395 0.1562
## 3of5_17 0.430 0.1845
## 3of5_18 0.364 0.1323
## 3of5_20 0.538 0.2891
## 3of5_21 0.449 0.2016
## 3of5_22 0.467 0.2184
## 3of5_23 0.864 0.7471
## 3of5_24 0.587 0.3443
## 3of5_25 0.534 0.2848
## 3of5_26 0.687 0.4724
## 3of5_27 0.562 0.3154
## 3of5_28 0.442 0.1957
## 3of5_29 0.585 0.3422
## 3of5_30 0.504 0.2537
## 3of5_31 0.465 0.2166
## 3of5_32 0.617 0.3808
##
## SS loadings: 107
## Proportion Var: 0.475
##
## Factor correlations:
##
## F1
## F1 1
good_items_stats = itemstats(good_items_scored %>% select(-id))
good_items_stats$itemstats$g_loading = good_items_fit %>% summary() %>% .$rotF %>% as.vector()
## F1 h2
## a_specific_number 0.541 0.2927
## a_type_of_brass_instrument 0.875 0.7648
## a_type_of_drapery 0.713 0.5084
## a_type_of_fabric_101 0.799 0.6387
## a_type_of_fabric_98 0.675 0.4552
## a_type_of_grasshopper 0.793 0.6295
## a_type_of_hat 0.501 0.2513
## a_type_of_heating_unit 0.614 0.3776
## a_type_of_magistrate_position 0.813 0.6616
## a_type_of_mathematical_operation 0.653 0.4260
## a_type_of_overshoe 0.472 0.2231
## a_type_of_plant 0.786 0.6174
## a_type_of_religious_teacher 0.792 0.6273
## a_type_of_rock 0.507 0.2572
## a_type_of_skirts 0.692 0.4794
## a_type_of_smokeless_powder 0.536 0.2872
## a_type_of_sound 0.502 0.2522
## a_type_of_sword 0.383 0.1468
## a_type_of_volcanic_crater 0.682 0.4645
## a_type_of_wavy_form 0.722 0.5206
## actionable_negligence 0.822 0.6758
## advisory 0.801 0.6413
## advocate 0.629 0.3956
## almighty 0.921 0.8475
## amazement 0.810 0.6559
## amenability 0.804 0.6472
## aroma 0.921 0.8476
## auspices 0.602 0.3629
## avoid 0.910 0.8282
## barely_able_to_read_and_write 0.857 0.7346
## because 0.782 0.6123
## berate 0.789 0.6218
## blade 0.790 0.6238
## blend 0.735 0.5408
## blunder 0.887 0.7871
## bow 0.758 0.5745
## brief 0.449 0.2020
## bring_about 0.824 0.6795
## carelessly_or_hastily_put_together 0.716 0.5128
## celebration 0.726 0.5265
## cheat 0.796 0.6339
## cheerful 0.977 0.9545
## clay_pigeon_shooting 0.812 0.6600
## coarse 0.834 0.6953
## collect_or_study_insects 0.771 0.5946
## colossal 0.728 0.5306
## commotion 0.893 0.7968
## complainer 0.812 0.6589
## confiscate 0.844 0.7121
## congratulate 0.583 0.3397
## construct 0.994 0.9882
## contemplate 0.622 0.3872
## convoy 0.855 0.7303
## couch 0.684 0.4681
## crease 0.789 0.6232
## cunning 0.696 0.4842
## deceit 0.556 0.3096
## deliberately 0.756 0.5719
## deprive 0.654 0.4276
## detectable 0.770 0.5931
## diatribes 0.613 0.3763
## disjoined 0.334 0.1113
## disrupt 0.820 0.6729
## distinct 0.351 0.1231
## divergence 0.776 0.6023
## dome 0.578 0.3336
## downfall 0.805 0.6486
## drink 0.822 0.6751
## elite 0.748 0.5599
## embarrassment 0.612 0.3745
## emphasize 0.888 0.7878
## empire 0.582 0.3386
## empty 0.770 0.5923
## entanglement 0.706 0.4988
## environment 0.566 0.3209
## evil 0.728 0.5304
## excite 0.795 0.6316
## farewell 0.940 0.8831
## flammable 0.926 0.8572
## flatterer 0.886 0.7849
## flatteries 0.586 0.3435
## forever 0.817 0.6668
## frequent 0.621 0.3855
## gigantic 0.854 0.7298
## girl 0.774 0.5987
## goo 0.870 0.7563
## goodbye 0.896 0.8033
## greed 0.818 0.6683
## groan 0.843 0.7102
## gruesome 0.707 0.4992
## guarantee 0.514 0.2646
## gutter 0.822 0.6761
## harmfulness 0.600 0.3598
## hobby 0.772 0.5957
## hut 0.851 0.7240
## illness 0.516 0.2661
## impromptu 0.621 0.3861
## indescribable 0.833 0.6947
## intellectual 0.758 0.5747
## jargon 0.950 0.9027
## knowledgeable 0.832 0.6925
## lackadaisical 0.559 0.3125
## manager 0.906 0.8204
## meal 0.739 0.5455
## melodic 0.734 0.5381
## mutually 0.881 0.7770
## nonsense 0.875 0.7661
## nonsensical 0.917 0.8409
## not_coveted 0.701 0.4919
## pamper 0.564 0.3186
## penitentiary 0.714 0.5102
## perplexing 0.731 0.5348
## persistence 0.739 0.5461
## predetermine 0.700 0.4899
## pretender 0.918 0.8425
## questioning 0.749 0.5607
## quickly 0.651 0.4233
## rebellious 0.807 0.6512
## referee 0.796 0.6344
## referendum 0.475 0.2257
## relating_to_the_right 0.452 0.2045
## relevant 0.736 0.5414
## remove 0.996 0.9926
## respectful 0.796 0.6343
## retailer 0.627 0.3936
## retract 0.643 0.4136
## ropes 0.786 0.6173
## sag 0.662 0.4389
## schemer 0.634 0.4023
## seize 0.780 0.6091
## sensitivity 0.701 0.4918
## shadows 0.609 0.3705
## silly 0.957 0.9163
## sketch 0.641 0.4106
## slang 0.812 0.6589
## slave 0.430 0.1850
## sluggish 0.699 0.4891
## somber 0.560 0.3133
## spinelessness 0.670 0.4493
## sporadic 0.835 0.6980
## squad 0.942 0.8866
## stagger 0.727 0.5287
## stinking 0.894 0.7999
## stroll 0.850 0.7231
## stubborn_100 0.670 0.4492
## stubborn_143 0.708 0.5008
## stylish 0.630 0.3963
## summit 0.742 0.5501
## terminology 0.814 0.6630
## the_science_of_speech_sounds 0.867 0.7522
## transportation 0.385 0.1483
## tyrant 0.411 0.1689
## unhealthful 0.402 0.1619
## vile 0.899 0.8088
## vulgar 0.745 0.5550
## wandering 0.416 0.1733
## warning 0.749 0.5614
## wave 0.737 0.5433
## weaponry 0.673 0.4531
## 1of5_1 0.346 0.1195
## 1of5_3 0.270 0.0729
## 1of5_4 0.274 0.0753
## 2of5_1 0.439 0.1927
## 2of5_2 0.523 0.2737
## 2of5_3 0.598 0.3575
## 2of5_4 0.600 0.3599
## 2of5_5 0.543 0.2948
## 2of5_6 0.747 0.5579
## 2of5_7 0.371 0.1373
## 2of5_8 0.583 0.3399
## 2of5_9 0.582 0.3384
## 2of5_10 0.345 0.1190
## 2of5_11 0.398 0.1585
## 2of5_12 0.407 0.1653
## 2of5_13 0.494 0.2436
## 2of5_14 0.320 0.1024
## 2of5_15 0.679 0.4611
## 2of5_16 0.534 0.2849
## 2of5_17 0.697 0.4863
## 2of5_18 0.722 0.5215
## 2of5_19 0.474 0.2242
## 2of5_20 0.338 0.1141
## 2of5_21 0.526 0.2766
## 2of5_22 0.847 0.7172
## 2of5_23 0.322 0.1035
## 2of5_25 0.732 0.5352
## 2of5_26 0.708 0.5014
## 2of5_27 0.551 0.3037
## 2of5_28 0.332 0.1100
## 2of5_29 0.565 0.3188
## 2of5_30 0.589 0.3466
## 2of5_31 0.549 0.3013
## 2of5_32 0.500 0.2504
## 2of5_33 0.317 0.1002
## 2of5_34 0.438 0.1921
## 2of5_35 0.311 0.0970
## 2of5_36 0.809 0.6546
## 2of5_37 0.898 0.8067
## 3of5_1 0.261 0.0680
## 3of5_2 0.458 0.2098
## 3of5_3 0.481 0.2309
## 3of5_4 0.688 0.4729
## 3of5_5 0.376 0.1413
## 3of5_6 0.733 0.5374
## 3of5_7 0.567 0.3212
## 3of5_10 0.820 0.6725
## 3of5_11 0.698 0.4877
## 3of5_12 0.553 0.3054
## 3of5_13 0.321 0.1028
## 3of5_14 0.512 0.2624
## 3of5_16 0.395 0.1562
## 3of5_17 0.430 0.1845
## 3of5_18 0.364 0.1323
## 3of5_20 0.538 0.2891
## 3of5_21 0.449 0.2016
## 3of5_22 0.467 0.2184
## 3of5_23 0.864 0.7471
## 3of5_24 0.587 0.3443
## 3of5_25 0.534 0.2848
## 3of5_26 0.687 0.4724
## 3of5_27 0.562 0.3154
## 3of5_28 0.442 0.1957
## 3of5_29 0.585 0.3422
## 3of5_30 0.504 0.2537
## 3of5_31 0.465 0.2166
## 3of5_32 0.617 0.3808
##
## SS loadings: 107
## Proportion Var: 0.475
##
## Factor correlations:
##
## F1
## F1 1
good_items_stats$itemstats$discrim = coef(good_items_fit, simplify = T)$items[, 1] %>% unname()
good_items_stats$itemstats$difficulty = -coef(good_items_fit, simplify = T)$items[, 2] %>% unname()
good_items_stats$itemstats
good_items_stats$itemstats %>%
describe2()
#difficulties
good_items_stats$itemstats$mean %>% GG_denhist() +
scale_x_continuous("Pass rate")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
GG_save("figs/good items pass rate.png")
#loadings
good_items_stats$itemstats$g_loading %>% GG_denhist() +
scale_x_continuous("Factor loading")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
GG_save("figs/good items factor loading.png")
#reliability
good_items_fit_scores = fscores(good_items_fit, full.scores.SE = T)
empirical_rxx(good_items_fit_scores)
## F1
## 0.977
#which range has >.90?
d_rxx_info = rxx_info = get_reliabilities(good_items_fit)
d_rxx_info %>%
filter(rel > .90) %>%
describe2()
d_rxx_info %>%
filter(rel > .80) %>%
describe2()
d_rxx_info %>%
ggplot(aes(z, rel)) +
geom_line() +
scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
scale_x_continuous("Ability level (z)") +
coord_cartesian(xlim = c(-4, 4))
GG_save("figs/good items reliability as function of ability.png")
d_rxx_info %>%
filter(z >= -2, z <= 2) %>%
describe2()
d_rxx_info %>%
filter(z >= -3, z <= 3) %>%
describe2()
#difficulty and g-loading
good_items_stats$itemstats %>%
rownames_to_column() %>%
GG_scatter("mean", "g_loading", case_names = "rowname") +
xlab("Pass rate") +
ylab("Factor loading")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/good items scatter pass rate loading.png")
## `geom_smooth()` using formula = 'y ~ x'
#merge g score back to main dataset
d2 = left_join(
d,
tibble(id = good_items_scored$id,
g = good_items_fit_scores[, 1]),
by = c("Participant_id" = "id")
)
assert_that(!anyDuplicated(d2$Participant_id))
## [1] TRUE
#the age problem
GG_scatter(d2, "age", "g") +
geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#scores by age group
d2$age_group = discretize(d2$age, 3)
GG_group_means(d2, "g", "age_group", type = "boxplot") +
scale_y_continuous("Vocabulary IRT score") +
scale_x_discrete("Age group")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
GG_save("figs/good items boxplots by age.png")
describe2(d2$g, d2$age_group)
## New names:
## • `` -> `...1`
#whites differ in age?
describe2(d2$age, d2$white_only)
## New names:
## • `` -> `...1`
#white subset is easier to work with
d2_white = d2 %>% filter(white_only)
(white_ageadj_model = lm(g ~ age, data = d2_white))
##
## Call:
## lm(formula = g ~ age, data = d2_white)
##
## Coefficients:
## (Intercept) age
## -1.1365 0.0264
#get resids, step 1
d2_white$g_ageadj1 = resid(white_ageadj_model)
(ageadj_desc_whites = describe2(d2_white$g_ageadj1))
#alternative is to just model the absolute resids directly
(absSD_ols_whites = lm(abs(g_ageadj1) ~ age, data = d2_white))
##
## Call:
## lm(formula = abs(g_ageadj1) ~ age, data = d2_white)
##
## Coefficients:
## (Intercept) age
## 0.48643 0.00535
#get age mean and SD adjusted scores
d2_white$g_ageadj2 = d2_white$g_ageadj1 / predict(absSD_ols_whites)
#does this work tho?
d2_white %>%
GG_scatter("age", "g_ageadj2")
## `geom_smooth()` using formula = 'y ~ x'
test_HS(d2_white$g_ageadj2, d2_white$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
#restore to white z score norms
(white_desc_ageadj2_desc = describe2(d2_white$g_ageadj2))
d2_white$g_ageadj3 = (d2_white$g_ageadj2 - white_desc_ageadj2_desc$mean) / white_desc_ageadj2_desc$sd
d2_white$g_ageadj3 %>% describe2()
#Thus finally, we can make IQs using a function
vocab_norms = kirkegaard::make_norms(
score = d2$g,
age = d2$age,
norm_group = d2$race_combos_common == "white"
)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.003**). Model used.
d2$IQ = vocab_norms$data$IQ
#plot results to see if they make sense
d2 %>%
GG_denhist("IQ", "white_only")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/good items IQ scores by Whiteness.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
describe2(d2$IQ, d2$white_only)
## New names:
## • `` -> `...1`
#should be no age relationship within groups
d2 %>%
GG_scatter("age", "IQ")
## `geom_smooth()` using formula = 'y ~ x'
#and no heteroscedasticity
test_HS(d2$IQ, d2$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
d2_mmpi = d2 %>% select(I_am_easily_awakened_by_noise:I_like_movie_love_scenes) %>%
map_df(~mapvalues(., from = c("Yes", "No"), to = c(1, 0))) %>%
map_df(as.numeric)
#IQ means by MMPI
MMPI_IQ_means = map2_df(d2_mmpi, names(d2_mmpi), function(x, y) {
# browser()
desc = suppressMessages(describe2(d2$IQ, group = x))
tibble(
question = d_vars %>% filter(var_name == y) %>% pull(label),
yes = desc$mean[2],
no = desc$mean[1],
IQ_gap = yes-no,
abs_IQ_gap = abs(IQ_gap)
)
})
GG_scatter(d2, "Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct", "IQ") +
scale_x_continuous("Answer to 'How many items on the test you just took do you think you got correct?'")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/self-estimate vs. IQ.png")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d2, "Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did", "IQ")
## `geom_smooth()` using formula = 'y ~ x'
paired.r(
cor(d2$IQ, d2$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct, use = "pair"),
cor(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did, use = "pair"),
n = pairwiseCount(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did)[1, 1]
)
## Call: paired.r(xy = cor(d2$IQ, d2$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct,
## use = "pair"), xz = cor(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did,
## use = "pair"), n = pairwiseCount(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did)[1,
## 1])
## [1] "test of difference between two independent correlations"
## z = 3.94 With probability = 0
#sex bias
sex_bias_fit = cache_object(filename = "data/sex_bias_fit.rds", expr = DIF_test(
items = good_items_scored %>% select(-id),
model = 1,
group = d2$sex,
technical = list(NCYCLES = 2000)
))
## Cache found, reading object from disk
#DIF items
sex_bias_fit$DIF_stats %>% filter(p < .05)
sex_bias_fit$DIF_stats %>% filter(p_adj < .05)
sex_bias_fit$effect_size_test
## $liberal
## Effect Size Value
## 1 STDS 0.09705
## 2 UTDS 4.73322
## 3 UETSDS 0.51663
## 4 ETSSD 0.00256
## 5 Starks.DTFR 0.14094
## 6 UDTFR 4.60979
## 7 UETSDN 0.54145
## 8 theta.of.max.test.D -2.01111
## 9 Test.Dmax -2.00227
##
## $conservative
## Effect Size Value
## 1 STDS 0.15125
## 2 UTDS 0.97288
## 3 UETSDS 0.23760
## 4 ETSSD 0.00398
## 5 Starks.DTFR 0.17914
## 6 UDTFR 0.90075
## 7 UETSDN 0.24653
## 8 theta.of.max.test.D 0.51284
## 9 Test.Dmax 0.39618
sex_bias_fit$DIF_stats$item_number = seq_along_rows(sex_bias_fit$DIF_stats)
#plot items
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace")
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))
save_plot_to_file({
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))
}, filename = "figs/good items sex DIF.png")
sex_bias_fit$fits$anchor_liberal %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p < .05) %>% pull(item_number))
#plot bias for test
sex_bias_fit$fits$anchor_conservative %>% plot(type = "score")
#sex difference
SMD_matrix(d2$IQ, d2$sex)
## Male Female
## Male NA -0.0684
## Female -0.0684 NA
describe2(d2$IQ, d2$sex)
## New names:
## • `` -> `...1`
var.test(IQ ~ sex, data = d2)
##
## F test to compare two variances
##
## data: IQ by sex
## F = 1, num df = 238, denom df = 259, p-value = 0.06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.995 1.638
## sample estimates:
## ratio of variances
## 1.28
#item level pass rate SD by sex
good_items_stats_sexes = itemstats(
all_items_scored %>% select(-id),
group = d2$sex
)
## Warning in cor(data, use = "pairwise.complete.obs"): the standard deviation is
## zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation
## is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation
## is zero
bind_rows(
good_items_stats_sexes$Male$itemstats %>% mutate(sex = "Men"),
good_items_stats_sexes$Female$itemstats %>% mutate(sex = "Women")
) %>%
GG_denhist("sd", group = "sex")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
t.test(
good_items_stats_sexes$Male$itemstats$sd,
good_items_stats_sexes$Female$itemstats$sd
)
##
## Welch Two Sample t-test
##
## data: good_items_stats_sexes$Male$itemstats$sd and good_items_stats_sexes$Female$itemstats$sd
## t = 3, df = 441, p-value = 0.002
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.00992 0.04547
## sample estimates:
## mean of x mean of y
## 0.416 0.388
#white subset
d2_white = d2 %>% filter(race_combos_common == "white")
SMD_matrix(d2_white$IQ, d2_white$sex)
## Male Female
## Male NA 0.039
## Female 0.039 NA
describe2(d2_white$IQ, d2_white$sex)
## New names:
## • `` -> `...1`
var.test(IQ ~ sex, data = d2_white)
##
## F test to compare two variances
##
## data: IQ by sex
## F = 1, num df = 171, denom df = 186, p-value = 0.1
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.957 1.725
## sample estimates:
## ratio of variances
## 1.28
d2 %>%
filter(race_combos_common == "white") %>%
GG_denhist("IQ", "sex")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
max_items = 50
max_cycles = 2000
plan(multisession(workers = 7))
#forward optimize for reliability
vocab_abbrev_forward = cache_object(filename = "data/vocab_abbrev_rc50.rds", expr = {
abbreviate_scale(
items = good_items_scored %>% select(-id),
item_target = max_items,
method = "forwards",
selection_method = "rc",
mirt_args = list(
model = 1,
itemtype = "2PL",
verbose = F,
technical = list(NCYCLES = max_cycles)
)
)
}, renew = renew_all)
## Cache found, reading object from disk
#max laoding
#simple
max_loading_basic = abbreviate_scale(
items = good_items_scored %>% select(-id),
item_target = max_items,
method = "max_loading",
selection_method = "rc",
mirt_args = list(
model = 1,
itemtype = "2PL",
verbose = F,
technical = list(NCYCLES = max_cycles)
)
)
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## EM cycles terminated after 2000 iterations.
## EM cycles terminated after 2000 iterations.
## 127.639 sec elapsed
#balancing
max_loading_balanced = abbreviate_scale(
items = good_items_scored %>% select(-id),
item_target = max_items,
method = "max_loading",
difficulty_balance_groups = 5,
selection_method = "rc",
mirt_args = list(
model = 1,
itemtype = "2PL",
verbose = F,
technical = list(NCYCLES = max_cycles)
)
)
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## EM cycles terminated after 2000 iterations.
## 99.324 sec elapsed
#residualization
max_loading_resid = abbreviate_scale(
items = good_items_scored %>% select(-id),
item_target = max_items,
method = "max_loading",
residualize_loadings = T,
selection_method = "rc",
mirt_args = list(
model = 1,
itemtype = "2PL",
verbose = F,
technical = list(NCYCLES = max_cycles)
)
)
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## EM cycles terminated after 2000 iterations.
## 97.404 sec elapsed
#plot results
abbrev_results = bind_rows(
vocab_abbrev_forward$best_sets %>% mutate(method = "step forward"),
max_loading_basic$best_sets %>% mutate(method = "max loading, basic"),
max_loading_balanced$best_sets %>% mutate(method = "max loading, balanced"),
max_loading_resid$best_sets %>% mutate(method = "max loading, resid")
) %>%
select(reliability, r_full_score, method, items_in_scale, criterion_value) %>%
pivot_longer(
cols = c("reliability", "r_full_score", "criterion_value"),
names_to = "criterion",
values_to = "value"
) %>%
mutate(
criterion = case_when(
criterion == "reliability" ~ "Reliability",
criterion == "r_full_score" ~ "Cor. with full score",
criterion == "criterion_value" ~ "Combined index"
)
)
abbrev_results %>%
ggplot(aes(items_in_scale, value, color = method)) +
geom_line() +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, .1)) +
# geom_point() +
facet_wrap("criterion")
GG_save("figs/abbreviation_comparison.png")
#make norms for abbreviated scales
abbrev_scales_items = c(10, 15, 20, 25, 30)
abbrev_scales_norms = map(abbrev_scales_items, function(item_count) {
make_norms(
score = vocab_abbrev_forward$best_sets %>% filter(items_in_scale == item_count) %>% extract2("scores") %>% extract2(1) %>% extract(, 1),
age = d2$age,
norm_group = d2$white_only,
p_value = .05
)
}) %>% set_names("scale_" + abbrev_scales_items)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.035). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.034). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = <0.001***). Model used.
#move scores into main dataset
for (scale in abbrev_scales_items) {
d2[["vocab_IQ_" + scale]] = abbrev_scales_norms[["scale_" + scale]]$data$IQ
}
#verify that age norming was done correctly by checking for age effects and white mean/SD
describe2(d2$vocab_IQ_30, d2$white_only)
## New names:
## • `` -> `...1`
#linear effect of age?
GG_scatter(d2, "age", "vocab_IQ_30")
## `geom_smooth()` using formula = 'y ~ x'
#and no heteroscedasticity
test_HS(d2$vocab_IQ_30, d2$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
#versions
write_sessioninfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_DK.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_DK.UTF-8 LC_COLLATE=en_DK.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_DK.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Copenhagen
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] furrr_0.3.1 future_1.33.2 ggeffects_1.5.1
## [4] rms_6.8-0 googlesheets4_1.1.1 mirt_1.41.8
## [7] lattice_0.22-5 readxl_1.4.3 kirkegaard_2024-04-23
## [10] psych_2.4.3 assertthat_0.2.1 weights_1.0.4
## [13] Hmisc_5.1-2 magrittr_2.0.3 lubridate_1.9.3
## [16] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [19] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [22] tibble_3.2.1 ggplot2_3.5.0 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.16.0 jsonlite_1.8.8 shape_1.4.6.1
## [4] TH.data_1.1-2 jomo_2.7-6 farver_2.1.1
## [7] nloptr_2.0.3 rmarkdown_2.26 ragg_1.3.0
## [10] fs_1.6.3 vctrs_0.6.5 minqa_1.2.6
## [13] base64enc_0.1-3 htmltools_0.5.8.1 polspline_1.1.24
## [16] tictoc_1.2.1 broom_1.0.5 cellranger_1.1.0
## [19] Formula_1.2-5 mitml_0.4-5 dcurver_0.9.2
## [22] sass_0.4.9 parallelly_1.37.1 bslib_0.7.0
## [25] htmlwidgets_1.6.4 plyr_1.8.9 sandwich_3.1-0
## [28] zoo_1.8-12 cachem_1.0.8 lifecycle_1.0.4
## [31] iterators_1.0.14 pkgconfig_2.0.3 Matrix_1.6-5
## [34] R6_2.5.1 fastmap_1.1.1 digest_0.6.35
## [37] colorspace_2.1-0 textshaping_0.3.7 vegan_2.6-4
## [40] labeling_0.4.3 fansi_1.0.6 timechange_0.3.0
## [43] gdata_3.0.0 mgcv_1.9-1 compiler_4.3.3
## [46] gargle_1.5.2 bit64_4.0.5 withr_3.0.0
## [49] htmlTable_2.4.2 backports_1.4.1 highr_0.10
## [52] pan_1.9 MASS_7.3-60 quantreg_5.97
## [55] GPArotation_2024.3-1 gtools_3.9.5 permute_0.9-7
## [58] tools_4.3.3 foreign_0.8-86 googledrive_2.1.1
## [61] nnet_7.3-19 glue_1.7.0 nlme_3.1-163
## [64] grid_4.3.3 checkmate_2.3.1 cluster_2.1.6
## [67] generics_0.1.3 gtable_0.3.4 tzdb_0.4.0
## [70] data.table_1.15.4 hms_1.1.3 Deriv_4.1.3
## [73] utf8_1.2.4 foreach_1.5.2 pillar_1.9.0
## [76] vroom_1.6.5 splines_4.3.3 survival_3.5-8
## [79] bit_4.0.5 SparseM_1.81 tidyselect_1.2.1
## [82] pbapply_1.7-2 knitr_1.45 gridExtra_2.3
## [85] xfun_0.43 stringi_1.8.3 yaml_2.3.8
## [88] boot_1.3-30 evaluate_0.23 codetools_0.2-19
## [91] cli_3.6.2 rpart_4.1.23 systemfonts_1.0.6
## [94] munsell_0.5.1 jquerylib_0.1.4 Rcpp_1.0.12
## [97] globals_0.16.3 parallel_4.3.3 MatrixModels_0.5-3
## [100] lme4_1.1-35.2 listenv_0.9.1 glmnet_4.1-8
## [103] mvtnorm_1.2-4 scales_1.3.0 insight_0.19.10
## [106] crayon_1.5.2 rlang_1.1.3 multcomp_1.4-25
## [109] mnormt_2.1.1 mice_3.16.0
#write main data to file for reuse
d2 %>% write_rds("data/main data.rds", compress = "xz")
#save scored items
good_items_scored %>% write_rds("data/item data.rds", compress = "xz")
#save norms
vocab_norms %>% write_rds("data/vocab norms.rds", compress = "xz")
abbrev_scales_norms %>% write_rds("data/vocab abbrev norms.rds", compress = "xz")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/6gcy4/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "vocab.Rmd", "vocab.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}