Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  readxl,
  mirt,
  googlesheets4,
  rms,
  ggeffects,
  future, furrr
)
## Loading required package: stats4
## Loading required package: lattice
theme_set(theme_bw())

options(
    digits = 3
)

mirtCluster()

plan(multisession(workers = 7))

#delete cache and re-run all
renew_all = F

Functions

Data

#sensitive data
if (F) {
  d_prolific = read_csv("data/prolific_export_64877cb0ca7d649ce538e74d.csv") %>% df_legalize_names()
  d_alchemer = read_csv("data/20230618204902-SurveyExport.csv") %>% df_legalize_names() %>% filter(Status == "Complete")
  
  assert_that(!any(duplicated(d_prolific$Participant_id)))
  assert_that(!any(duplicated(d_alchemer$Write_in_your_Prolific_ID)))
  d_alchemer$Write_in_your_Prolific_ID %>% table2()
  
  #join on prolific id
  d = inner_join(
    d_prolific,
    d_alchemer,
    by = c("Participant_id" = "Write_in_your_Prolific_ID")
  )
  
  assert_that(!any(duplicated(d$Participant_id)))
  
  d %>% 
    select(
      -Submission_id,
      -IP_Address,
      -Referer,
      -SessionID,
      -User_Agent
    ) %>% 
    write_rds("data/raw data.rds", compress = "xz")
}

#load prepared data without sensitive information
d = read_rds("data/raw data.rds")

#table
d_vars = df_var_table(d)

Analysis

Recode

d$native_speaker = d$Is_English_one_of_your_native_languages == "Yes"
table2(d$native_speaker)
#recode covariates
d$age = d$How_old_are_you %>% str_match("\\d+") %>% as.numeric()
d$sex = d$What_is_your_biological_sex %>% mapvalues(from = c("Male (Y chromosome)", "Female (no Y chromosome)"), to = c("Male", "Female")) %>% factor(levels = c("Male", "Female"))

#ethnicity dummies
ethnicity_vars = d %>% select(White_European_Which_of_the_following_racial_ethnic_groups_do_you_identify_with:Other_Which_of_the_following_racial_ethnic_groups_do_you_identify_with) %>% names()
d$white = d$White_European_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$jewish = d$Jewish_Ashkenazi_Sephardic_Misrahi_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$black = d$Black_African_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$hispanic = d$Hispanic_Latino_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$middle_eastern = d$Middle_Eastern_North_African_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$south_asian = d$South_Asian_Indian_subcontinent_excluding_Pakistani_and_Bangladeshi_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$southest_asian = d$Southeast_Asian_Bangladeshi_Vietnamese_Burmese_etc_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$east_asian = d$East_Asian_Chinese_Korean_Japanese_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$polynesian = d$Polynesian_Pacific_Islander_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$native_american = d$Native_American_Amerindian_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$other_race = d$Other_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
ethnicity_simple_vars = d %>% select(white:other_race) %>% names() %>% factor()
d$white_only = as.logical(d$white) & (d %>% select(white:other_race) %>% select(-white) %>% map_df(as.logical) %>% rowSums() %>% equals(0))
d$black_only = as.logical(d$black) & (d %>% select(white:other_race) %>% select(-black) %>% map_df(as.logical) %>% rowSums() %>% equals(0))

#combinations
d$race_combos = d %>% select(white:other_race) %>% encode_combinations()

d$race_combos %>% table2()
#common combos
d$race_combos_common = d$race_combos %>% fct_lump_min(min = 9)
d$race_combos_common %>% table2()
d$sex %>% table2()

Easy items

easy_items = d %>% select(silly:cheat)

scoring_key = read_excel("data/answer keys for 155 items.xlsx", col_names = c("word", "correct", "notes"))
scoring_key = bind_rows(
  tibble(
    word = c("silly", "avoid", "remove", "construct"),
    correct = c("childish", "evade", "abolish", "create"),
    notes = NA
  ),
  scoring_key
) %>% arrange(word)

#sort data colnames alphabetically
colnames_clean = easy_items %>% colnames() %>% str_clean()
order(colnames_clean)
##   [1]  79  40  88  64  61 133 145  12  42  47 140 112 124   6  73  95 118 129
##  [19]  87 138  98  30 105  45  49  89   8 156   2  83  52 102  35 116  29 155
##  [37] 152 104  55 137 159   7  67  17  51  74  59 109  50  68   4  62 108  46
##  [55]  41 131  20 100 111  15 122 151  14 103 126 120  69 132  27  34  10  65
##  [73]  22 149 158   5  84  11  16  91 130  76  24  32 113  36  43 125  86  54
##  [91]  38  57  94  70  31 107 150  26  80  13  44 136  19 148  99  18  28  60
## [109]  77 142  58  85  78  56   9  48  37 115  71 157 117 153   3  75  96  90
## [127]  92 114 154  53 139 141   1 128  72 110 127 135 119 123  23 101  25  66
## [145]  63 106 144  97 121  21  81 147 146  33 134 143  39  93  82
easy_items_sorted = easy_items[, order(colnames_clean)]

#check matches
vocab_matches = tibble(
  scoring_key_word = scoring_key$word,
  scoring_key_correct = scoring_key$correct,
  data = easy_items_sorted %>% colnames()
)

#stubborn is duplicated, but fortunately in the right order
easy_items_scored = score_items(
  easy_items_sorted,
  scoring_key$correct
)

easy_items_table = map_df(easy_items, table2)

#count of correct
d$vocab_sumscore = rowSums(easy_items_scored)

#IRT
vocab_irt = cache_object(filename = "data/vocab_irt.rds", expr = mirt(
  easy_items_scored,
  model = 1,
  itemtype = "2PL",
  technical = list(NCYCLES = 5000),
  optimizer = "BFGS",
  method = "EM",
  guess = .20
),
renew = renew_all)
## Cache found, reading object from disk
#results
vocab_irt
## 
## Call:
## mirt(data = easy_items_scored, model = 1, itemtype = "2PL", guess = 0.2, 
##     method = "EM", optimizer = "BFGS", technical = list(NCYCLES = 5000))
## 
## Full-information item factor analysis with 1 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 5000 EM iterations.
## mirt version: 1.41.8 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -28464
## Estimated parameters: 318 
## AIC = 57565
## BIC = 58904; SABIC = 57895
## G2 (1e+10) = 50753, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
vocab_irt %>% summary()
##                                       F1    h2
## a_specific_number                  0.569 0.323
## a_type_of_brass_instrument         0.881 0.776
## a_type_of_drapery                  0.764 0.584
## a_type_of_fabric_101               0.780 0.608
## a_type_of_fabric_98                0.786 0.618
## a_type_of_grasshopper              0.873 0.762
## a_type_of_hat                      0.827 0.685
## a_type_of_heating_unit             0.638 0.407
## a_type_of_magistrate_position      0.845 0.714
## a_type_of_mathematical_operation   0.631 0.398
## a_type_of_overshoe                 0.630 0.397
## a_type_of_plant                    0.825 0.681
## a_type_of_religious_teacher        0.847 0.717
## a_type_of_rock                     0.501 0.251
## a_type_of_skirts                   0.748 0.559
## a_type_of_smokeless_powder         0.595 0.354
## a_type_of_sound                    0.622 0.387
## a_type_of_sword                    0.510 0.260
## a_type_of_volcanic_crater          0.675 0.456
## a_type_of_wavy_form                0.849 0.721
## actionable_negligence              0.845 0.715
## advisory                           0.718 0.516
## advocate                           0.733 0.537
## almighty                           0.905 0.818
## amazement                          0.791 0.626
## amenability                        0.844 0.712
## aroma                              0.816 0.666
## auspices                           0.867 0.753
## avoid                              0.807 0.651
## barely_able_to_read_and_write      0.896 0.803
## because                            0.778 0.605
## berate                             0.885 0.783
## blade                              0.764 0.583
## blend                              0.716 0.513
## blunder                            0.882 0.777
## bow                                0.903 0.815
## brief                              0.594 0.353
## bring_about                        0.864 0.746
## carelessly_or_hastily_put_together 0.743 0.552
## celebration                        0.809 0.654
## cheat                              0.891 0.793
## cheerful                           0.923 0.852
## clay_pigeon_shooting               0.822 0.676
## coarse                             0.816 0.665
## collect_or_study_insects           0.756 0.571
## colossal                           0.778 0.605
## commotion                          0.934 0.873
## complainer                         0.898 0.806
## confiscate                         0.897 0.805
## congratulate                       0.587 0.345
## construct                          0.977 0.954
## contemplate                        0.628 0.394
## convoy                             0.927 0.860
## couch                              0.785 0.617
## crease                             0.835 0.697
## cunning                            0.769 0.591
## deceit                             0.651 0.424
## deliberately                       0.827 0.684
## deprive                            0.679 0.461
## detectable                         0.728 0.530
## diatribes                          0.777 0.604
## disjoined                          0.507 0.257
## disrupt                            0.735 0.540
## distinct                           0.386 0.149
## divergence                         0.898 0.807
## dome                               0.677 0.459
## downfall                           0.801 0.641
## drink                              0.936 0.876
## elite                              0.809 0.655
## embarrassment                      0.595 0.355
## emphasize                          0.830 0.690
## empire                             0.640 0.409
## empty                              0.781 0.610
## entanglement                       0.843 0.710
## environment                        0.881 0.776
## evil                               0.627 0.393
## excite                             0.861 0.741
## farewell                           0.948 0.899
## flammable                          0.850 0.723
## flatterer                          0.914 0.835
## flatteries                         0.702 0.493
## forever                            0.863 0.744
## frequent                           0.602 0.362
## gigantic                           0.789 0.622
## girl                               0.831 0.691
## goo                                0.825 0.680
## goodbye                            0.896 0.803
## greed                              0.941 0.885
## groan                              0.861 0.741
## gruesome                           0.750 0.563
## guarantee                          0.503 0.253
## gutter                             0.792 0.627
## harmfulness                        0.593 0.351
## hobby                              0.757 0.573
## hut                                0.851 0.724
## illness                            0.601 0.362
## impromptu                          0.827 0.683
## indescribable                      0.788 0.620
## intellectual                       0.732 0.535
## jargon                             0.945 0.892
## knowledgeable                      0.885 0.783
## lackadaisical                      0.689 0.475
## manager                            0.829 0.688
## meal                               0.943 0.889
## melodic                            0.773 0.598
## mutually                           0.858 0.736
## nonsense                           0.909 0.827
## nonsensical                        0.875 0.766
## not_coveted                        0.759 0.577
## pamper                             0.861 0.741
## penitentiary                       0.711 0.506
## perplexing                         0.721 0.520
## persistence                        0.757 0.573
## predetermine                       0.675 0.456
## pretender                          0.824 0.680
## questioning                        0.759 0.576
## quickly                            0.727 0.529
## rebellious                         0.851 0.724
## referee                            0.802 0.643
## referendum                         0.761 0.579
## relating_to_the_right              0.538 0.289
## relevant                           0.833 0.694
## remove                             0.990 0.981
## respectful                         0.850 0.722
## retailer                           0.652 0.426
## retract                            0.646 0.418
## ropes                              0.837 0.701
## sag                                0.746 0.556
## schemer                            0.836 0.699
## seize                              0.820 0.673
## sensitivity                        0.805 0.648
## shadows                            0.745 0.555
## silly                              0.848 0.719
## sketch                             0.750 0.563
## slang                              0.868 0.753
## slave                              0.471 0.222
## sluggish                           0.781 0.610
## somber                             0.733 0.537
## spinelessness                      0.707 0.499
## sporadic                           0.924 0.853
## squad                              0.825 0.681
## stagger                            0.776 0.602
## stinking                           0.940 0.883
## stroll                             0.936 0.875
## stubborn_100                       0.688 0.473
## stubborn_143                       0.731 0.535
## stylish                            0.936 0.876
## summit                             0.762 0.581
## terminology                        0.920 0.847
## the_science_of_speech_sounds       0.839 0.704
## transportation                     0.484 0.234
## tyrant                             0.888 0.788
## unhealthful                        0.684 0.468
## vile                               0.922 0.851
## vulgar                             0.922 0.850
## wandering                          0.583 0.340
## warning                            0.746 0.557
## wave                               0.804 0.646
## weaponry                           0.715 0.512
## 
## SS loadings:  98.7 
## Proportion Var:  0.62 
## 
## Factor correlations: 
## 
##    F1
## F1  1
vocab_irt %>% coef(simplify = T)
## $items
##                                        a1      d   g u
## a_specific_number                   1.177  1.391 0.2 1
## a_type_of_brass_instrument          3.169  3.420 0.2 1
## a_type_of_drapery                   2.016  1.367 0.2 1
## a_type_of_fabric_101                2.120  2.970 0.2 1
## a_type_of_fabric_98                 2.164  0.886 0.2 1
## a_type_of_grasshopper               3.042  1.783 0.2 1
## a_type_of_hat                       2.507 -2.455 0.2 1
## a_type_of_heating_unit              1.411  1.720 0.2 1
## a_type_of_magistrate_position       2.687  2.521 0.2 1
## a_type_of_mathematical_operation    1.383  2.388 0.2 1
## a_type_of_overshoe                  1.381 -0.031 0.2 1
## a_type_of_plant                     2.487  1.948 0.2 1
## a_type_of_religious_teacher         2.708  1.977 0.2 1
## a_type_of_rock                      0.986  2.678 0.2 1
## a_type_of_skirts                    1.916  1.509 0.2 1
## a_type_of_smokeless_powder          1.260  0.728 0.2 1
## a_type_of_sound                     1.353  0.262 0.2 1
## a_type_of_sword                     1.010 -0.184 0.2 1
## a_type_of_volcanic_crater           1.559  1.722 0.2 1
## a_type_of_wavy_form                 2.735 -0.244 0.2 1
## actionable_negligence               2.695  1.857 0.2 1
## advisory                            1.756  3.504 0.2 1
## advocate                            1.834  0.908 0.2 1
## almighty                            3.610  4.885 0.2 1
## amazement                           2.202  2.542 0.2 1
## amenability                         2.678  1.763 0.2 1
## aroma                               2.403  5.181 0.2 1
## auspices                            2.968 -1.360 0.2 1
## avoid                               2.324  7.229 0.2 1
## barely_able_to_read_and_write       3.437  2.667 0.2 1
## because                             2.106  2.470 0.2 1
## berate                              3.231  2.321 0.2 1
## blade                               2.014  3.093 0.2 1
## blend                               1.746  2.462 0.2 1
## blunder                             3.180  4.087 0.2 1
## bow                                 3.576 -0.561 0.2 1
## brief                               1.258 -0.372 0.2 1
## bring_about                         2.918  2.297 0.2 1
## carelessly_or_hastily_put_together  1.888  1.837 0.2 1
## celebration                         2.339  0.869 0.2 1
## cheat                               3.333  0.789 0.2 1
## cheerful                            4.085  9.422 0.2 1
## clay_pigeon_shooting                2.460  3.204 0.2 1
## coarse                              2.401  3.195 0.2 1
## collect_or_study_insects            1.965  2.783 0.2 1
## colossal                            2.107  1.460 0.2 1
## commotion                           4.460  2.993 0.2 1
## complainer                          3.473  2.087 0.2 1
## confiscate                          3.458  3.275 0.2 1
## congratulate                        1.234  1.900 0.2 1
## construct                           7.775 24.870 0.2 1
## contemplate                         1.373  2.079 0.2 1
## convoy                              4.213  1.464 0.2 1
## couch                               2.159  0.874 0.2 1
## crease                              2.584  1.752 0.2 1
## cunning                             2.048  1.127 0.2 1
## deceit                              1.460  0.757 0.2 1
## deliberately                        2.502  1.571 0.2 1
## deprive                             1.573  1.497 0.2 1
## detectable                          1.808  3.296 0.2 1
## diatribes                           2.103 -0.290 0.2 1
## disjoined                           1.002 -0.813 0.2 1
## disrupt                             1.843  3.631 0.2 1
## distinct                            0.712  1.557 0.2 1
## divergence                          3.481  0.581 0.2 1
## dome                                1.568  0.671 0.2 1
## downfall                            2.275  2.421 0.2 1
## drink                               4.520  1.554 0.2 1
## elite                               2.345  1.823 0.2 1
## embarrassment                       1.261  1.878 0.2 1
## emphasize                           2.538  4.240 0.2 1
## empire                              1.417  1.215 0.2 1
## empty                               2.129  1.847 0.2 1
## entanglement                        2.666 -0.851 0.2 1
## environment                         3.165 -1.455 0.2 1
## evil                                1.370  4.531 0.2 1
## excite                              2.877  0.612 0.2 1
## farewell                            5.084  6.021 0.2 1
## flammable                           2.749  5.679 0.2 1
## flatterer                           3.832  2.349 0.2 1
## flatteries                          1.679  0.716 0.2 1
## forever                             2.905  2.269 0.2 1
## frequent                            1.283  3.586 0.2 1
## gigantic                            2.182  3.422 0.2 1
## girl                                2.544  2.167 0.2 1
## goo                                 2.483  4.032 0.2 1
## goodbye                             3.438  4.060 0.2 1
## greed                               4.724  0.061 0.2 1
## groan                               2.881  3.030 0.2 1
## gruesome                            1.931  1.530 0.2 1
## guarantee                           0.990  2.298 0.2 1
## gutter                              2.206  3.049 0.2 1
## harmfulness                         1.252  1.649 0.2 1
## hobby                               1.970  2.290 0.2 1
## hut                                 2.753  2.546 0.2 1
## illness                             1.281  0.538 0.2 1
## impromptu                           2.501 -0.236 0.2 1
## indescribable                       2.176  3.429 0.2 1
## intellectual                        1.827  2.987 0.2 1
## jargon                              4.895  5.716 0.2 1
## knowledgeable                       3.234  1.859 0.2 1
## lackadaisical                       1.620  0.742 0.2 1
## manager                             2.527  4.861 0.2 1
## meal                                4.805 -0.645 0.2 1
## melodic                             2.076  2.032 0.2 1
## mutually                            2.839  4.328 0.2 1
## nonsense                            3.716  2.797 0.2 1
## nonsensical                         3.078  4.863 0.2 1
## not_coveted                         1.986  1.730 0.2 1
## pamper                              2.882 -1.850 0.2 1
## penitentiary                        1.723  2.433 0.2 1
## perplexing                          1.773  2.327 0.2 1
## persistence                         1.970  1.813 0.2 1
## predetermine                        1.557  2.314 0.2 1
## pretender                           2.478  5.581 0.2 1
## questioning                         1.982  2.258 0.2 1
## quickly                             1.803  1.133 0.2 1
## rebellious                          2.760  1.886 0.2 1
## referee                             2.283  2.429 0.2 1
## referendum                          1.994 -1.869 0.2 1
## relating_to_the_right               1.085  0.332 0.2 1
## relevant                            2.563  0.430 0.2 1
## remove                             12.181 34.206 0.2 1
## respectful                          2.741  2.041 0.2 1
## retailer                            1.465  1.178 0.2 1
## retract                             1.441  1.834 0.2 1
## ropes                               2.605  2.114 0.2 1
## sag                                 1.906  0.479 0.2 1
## schemer                             2.594 -0.497 0.2 1
## seize                               2.439  2.156 0.2 1
## sensitivity                         2.308  0.675 0.2 1
## shadows                             1.899  0.416 0.2 1
## silly                               2.719  7.500 0.2 1
## sketch                              1.931  0.636 0.2 1
## slang                               2.970  1.753 0.2 1
## slave                               0.910  0.995 0.2 1
## sluggish                            2.131  0.706 0.2 1
## somber                              1.833  0.183 0.2 1
## spinelessness                       1.700  1.428 0.2 1
## sporadic                            4.102  0.226 0.2 1
## squad                               2.489  6.207 0.2 1
## stagger                             2.093  0.936 0.2 1
## stinking                            4.678  2.981 0.2 1
## stroll                              4.512  0.835 0.2 1
## stubborn_100                        1.612  1.691 0.2 1
## stubborn_143                        1.824  1.862 0.2 1
## stylish                             4.514 -2.432 0.2 1
## summit                              2.004  1.892 0.2 1
## terminology                         4.003  1.109 0.2 1
## the_science_of_speech_sounds        2.622  3.948 0.2 1
## transportation                      0.942  0.140 0.2 1
## tyrant                              3.282 -3.729 0.2 1
## unhealthful                         1.596 -1.445 0.2 1
## vile                                4.064  4.430 0.2 1
## vulgar                              4.054 -0.654 0.2 1
## wandering                           1.220 -0.417 0.2 1
## warning                             1.908  2.148 0.2 1
## wave                                2.301  1.495 0.2 1
## weaponry                            1.742  1.491 0.2 1
## 
## $means
## F1 
##  0 
## 
## $cov
##    F1
## F1  1
vocab_irt_scores = fscores(vocab_irt, full.scores.SE = T)
empirical_rxx(vocab_irt_scores)
##   F1 
## 0.97
plot(vocab_irt, type = "rxx")

d$g_easy = vocab_irt_scores[, 1] %>% standardize()
vocab_item_stats = itemstats(easy_items_scored)
vocab_item_stats$itemstats$g_loading = vocab_irt %>% summary() %>% .$rotF %>% .[, 1]
##                                       F1    h2
## a_specific_number                  0.569 0.323
## a_type_of_brass_instrument         0.881 0.776
## a_type_of_drapery                  0.764 0.584
## a_type_of_fabric_101               0.780 0.608
## a_type_of_fabric_98                0.786 0.618
## a_type_of_grasshopper              0.873 0.762
## a_type_of_hat                      0.827 0.685
## a_type_of_heating_unit             0.638 0.407
## a_type_of_magistrate_position      0.845 0.714
## a_type_of_mathematical_operation   0.631 0.398
## a_type_of_overshoe                 0.630 0.397
## a_type_of_plant                    0.825 0.681
## a_type_of_religious_teacher        0.847 0.717
## a_type_of_rock                     0.501 0.251
## a_type_of_skirts                   0.748 0.559
## a_type_of_smokeless_powder         0.595 0.354
## a_type_of_sound                    0.622 0.387
## a_type_of_sword                    0.510 0.260
## a_type_of_volcanic_crater          0.675 0.456
## a_type_of_wavy_form                0.849 0.721
## actionable_negligence              0.845 0.715
## advisory                           0.718 0.516
## advocate                           0.733 0.537
## almighty                           0.905 0.818
## amazement                          0.791 0.626
## amenability                        0.844 0.712
## aroma                              0.816 0.666
## auspices                           0.867 0.753
## avoid                              0.807 0.651
## barely_able_to_read_and_write      0.896 0.803
## because                            0.778 0.605
## berate                             0.885 0.783
## blade                              0.764 0.583
## blend                              0.716 0.513
## blunder                            0.882 0.777
## bow                                0.903 0.815
## brief                              0.594 0.353
## bring_about                        0.864 0.746
## carelessly_or_hastily_put_together 0.743 0.552
## celebration                        0.809 0.654
## cheat                              0.891 0.793
## cheerful                           0.923 0.852
## clay_pigeon_shooting               0.822 0.676
## coarse                             0.816 0.665
## collect_or_study_insects           0.756 0.571
## colossal                           0.778 0.605
## commotion                          0.934 0.873
## complainer                         0.898 0.806
## confiscate                         0.897 0.805
## congratulate                       0.587 0.345
## construct                          0.977 0.954
## contemplate                        0.628 0.394
## convoy                             0.927 0.860
## couch                              0.785 0.617
## crease                             0.835 0.697
## cunning                            0.769 0.591
## deceit                             0.651 0.424
## deliberately                       0.827 0.684
## deprive                            0.679 0.461
## detectable                         0.728 0.530
## diatribes                          0.777 0.604
## disjoined                          0.507 0.257
## disrupt                            0.735 0.540
## distinct                           0.386 0.149
## divergence                         0.898 0.807
## dome                               0.677 0.459
## downfall                           0.801 0.641
## drink                              0.936 0.876
## elite                              0.809 0.655
## embarrassment                      0.595 0.355
## emphasize                          0.830 0.690
## empire                             0.640 0.409
## empty                              0.781 0.610
## entanglement                       0.843 0.710
## environment                        0.881 0.776
## evil                               0.627 0.393
## excite                             0.861 0.741
## farewell                           0.948 0.899
## flammable                          0.850 0.723
## flatterer                          0.914 0.835
## flatteries                         0.702 0.493
## forever                            0.863 0.744
## frequent                           0.602 0.362
## gigantic                           0.789 0.622
## girl                               0.831 0.691
## goo                                0.825 0.680
## goodbye                            0.896 0.803
## greed                              0.941 0.885
## groan                              0.861 0.741
## gruesome                           0.750 0.563
## guarantee                          0.503 0.253
## gutter                             0.792 0.627
## harmfulness                        0.593 0.351
## hobby                              0.757 0.573
## hut                                0.851 0.724
## illness                            0.601 0.362
## impromptu                          0.827 0.683
## indescribable                      0.788 0.620
## intellectual                       0.732 0.535
## jargon                             0.945 0.892
## knowledgeable                      0.885 0.783
## lackadaisical                      0.689 0.475
## manager                            0.829 0.688
## meal                               0.943 0.889
## melodic                            0.773 0.598
## mutually                           0.858 0.736
## nonsense                           0.909 0.827
## nonsensical                        0.875 0.766
## not_coveted                        0.759 0.577
## pamper                             0.861 0.741
## penitentiary                       0.711 0.506
## perplexing                         0.721 0.520
## persistence                        0.757 0.573
## predetermine                       0.675 0.456
## pretender                          0.824 0.680
## questioning                        0.759 0.576
## quickly                            0.727 0.529
## rebellious                         0.851 0.724
## referee                            0.802 0.643
## referendum                         0.761 0.579
## relating_to_the_right              0.538 0.289
## relevant                           0.833 0.694
## remove                             0.990 0.981
## respectful                         0.850 0.722
## retailer                           0.652 0.426
## retract                            0.646 0.418
## ropes                              0.837 0.701
## sag                                0.746 0.556
## schemer                            0.836 0.699
## seize                              0.820 0.673
## sensitivity                        0.805 0.648
## shadows                            0.745 0.555
## silly                              0.848 0.719
## sketch                             0.750 0.563
## slang                              0.868 0.753
## slave                              0.471 0.222
## sluggish                           0.781 0.610
## somber                             0.733 0.537
## spinelessness                      0.707 0.499
## sporadic                           0.924 0.853
## squad                              0.825 0.681
## stagger                            0.776 0.602
## stinking                           0.940 0.883
## stroll                             0.936 0.875
## stubborn_100                       0.688 0.473
## stubborn_143                       0.731 0.535
## stylish                            0.936 0.876
## summit                             0.762 0.581
## terminology                        0.920 0.847
## the_science_of_speech_sounds       0.839 0.704
## transportation                     0.484 0.234
## tyrant                             0.888 0.788
## unhealthful                        0.684 0.468
## vile                               0.922 0.851
## vulgar                             0.922 0.850
## wandering                          0.583 0.340
## warning                            0.746 0.557
## wave                               0.804 0.646
## weaponry                           0.715 0.512
## 
## SS loadings:  98.7 
## Proportion Var:  0.62 
## 
## Factor correlations: 
## 
##    F1
## F1  1
vocab_item_stats$itemstats$mean %>% GG_denhist()
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`

vocab_item_stats$itemstats$mean %>% describe2()
GG_scatter(d, "vocab_sumscore", "g_easy")
## `geom_smooth()` using formula = 'y ~ x'

GG_denhist(d, "vocab_sumscore")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_denhist(d, "g_easy")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

d$vocab_sumscore_estimate = d$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct
d$vocab_ranking_estimate = d$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did

d %>% select(
  vocab_sumscore,
  g_easy,
  vocab_sumscore_estimate,
  vocab_ranking_estimate
) %>% wtd.cors()
##                         vocab_sumscore g_easy vocab_sumscore_estimate
## vocab_sumscore                   1.000  0.962                   0.612
## g_easy                           0.962  1.000                   0.629
## vocab_sumscore_estimate          0.612  0.629                   1.000
## vocab_ranking_estimate           0.385  0.406                   0.769
##                         vocab_ranking_estimate
## vocab_sumscore                           0.385
## g_easy                                   0.406
## vocab_sumscore_estimate                  0.769
## vocab_ranking_estimate                   1.000
GG_scatter(d, "Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct", "vocab_sumscore")
## `geom_smooth()` using formula = 'y ~ x'

#restandardize g
d$g_easy_ageadj = resid(ols(g_easy ~  rcs(age), data = d)) %>% unname()
## number of knots in rcs defaulting to 5
d$g_easy_ageadj_z = standardize(d$g_easy_ageadj, focal_group = d$white_only)

Hard items

hard_items = read_csv("data/follow up 20231029043500-SurveyExport.csv") %>% filter(Status == "Complete")
## New names:
## Rows: 475 Columns: 373
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (360): Status, Language, Referer, SessionID, User Agent, IP Address, Co... dbl
## (7): Response ID, Longitude, Latitude, New Hidden Value...206, New Hi... lgl
## (4): Contact ID, Legacy Comments, Comments, Tags dttm (2): Time Started, Date
## Submitted
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `recallable:Pick the 2 synonyms` -> `recallable:Pick the 2 synonyms...23`
## • `recallable:Pick the 2 synonyms` -> `recallable:Pick the 2 synonyms...90`
## • `New Hidden Value` -> `New Hidden Value...206`
## • `sack:Pick 3 words that belong together` -> `sack:Pick 3 words that belong
##   together...279`
## • `sack:Pick 3 words that belong together` -> `sack:Pick 3 words that belong
##   together...283`
## • `jaunty:Pick 3 words that belong together` -> `jaunty:Pick 3 words that
##   belong together...336`
## • `jaunty:Pick 3 words that belong together` -> `jaunty:Pick 3 words that
##   belong together...361`
## • `New Hidden Value` -> `New Hidden Value...367`
## • `New Hidden Value` -> `New Hidden Value...372`
## • `New Hidden Value` -> `New Hidden Value...373`
hard_items_meta = read_csv("data/follow up prolific_export_649a005bfc9bd0688f8e3304.csv")
## Rows: 465 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (13): Submission id, Participant id, Status, Completion code, Age, Sex,...
## dbl   (2): Time taken, Total approvals
## dttm  (4): Started at, Completed at, Reviewed at, Archived at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hard_items_var_list = df_var_table(hard_items)

#only completed data, only overlap
hard_items_meta %<>% filter(`Participant id` %in% hard_items$`Write in your Prolific ID:`)
hard_items %<>% filter(`Write in your Prolific ID:` %in% hard_items_meta$`Participant id`)

#no dups
assert_that(!anyDuplicated(hard_items$`Write in your Prolific ID:`))
## [1] TRUE
assert_that(!anyDuplicated(hard_items_meta$`Participant id`))
## [1] TRUE
#subset items
hard_items_2of5 = hard_items %>% select(21:205)
hard_items_3of5 = hard_items %>% select(207:366)
hard_items_1of5 = hard_items %>% select(368:371)

#score them
#1st options is always the right one
#but we can't see option order with csv export
hard_items_1of5_scored = score_items(hard_items_1of5,
                                     key = c(
                                       "whispering",
                                       "verbiage",
                                       "fragrance",
                                       "sagacious"
                                     )) %>% as_tibble()

hard_items_2of5_scored = map_df(seq_along(hard_items_2of5) %>% split_every_k(k = 5), function(idx) {
  # browser()
  #subset cols
  i_cols = hard_items_2of5[, unlist(idx)]
  i_cols_NA = i_cols
  i_cols_NA[] = !is.na(i_cols_NA)
  
  #score as correct using options 1+2
  (i_cols_NA[, 1] & i_cols_NA[, 2]) %>% as.vector() %>% as.numeric()
})

hard_items_3of5_scored = map_df(seq_along(hard_items_3of5) %>% split_every_k(k = 5), function(idx) {
  # browser()
  #subset cols
  i_cols = hard_items_3of5[, unlist(idx)]
  i_cols_NA = i_cols
  i_cols_NA[] = !is.na(i_cols_NA)
  
  #score as correct using options 1+2
  (i_cols_NA[, 1] & i_cols_NA[, 2] & i_cols_NA[, 3]) %>% as.vector() %>% as.numeric()
})

#combine
hard_items_scored = bind_cols(
  hard_items_1of5_scored %>% set_names("1of5_" + seq_along(hard_items_1of5_scored)),
  hard_items_2of5_scored %>% set_names("2of5_" + seq_along(hard_items_2of5_scored)),
  hard_items_3of5_scored %>% set_names("3of5_" + seq_along(hard_items_3of5_scored))
)

#item stats
hard_items_scored_stats = itemstats(hard_items_scored)

#IRT fit
hard_items_fit = cache_object(filename = "data/hard_items_fit.rds", expr = mirt(
  hard_items_scored,
  model = 1,
  itemtype = "2PL",
  technical = list(NCYCLES = 5000),
),
renew = renew_all)
## Cache found, reading object from disk
hard_items_fit
## 
## Call:
## mirt(data = hard_items_scored, model = 1, itemtype = "2PL", technical = list(NCYCLES = 5000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 55 EM iterations.
## mirt version: 1.41.8 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -16226
## Estimated parameters: 146 
## AIC = 32744
## BIC = 33339; SABIC = 32875
## G2 (1e+10) = 27181, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
hard_items_fit %>% summary()
##             F1      h2
## 1of5_1   0.400 0.16006
## 1of5_2   0.272 0.07408
## 1of5_3   0.293 0.08584
## 1of5_4   0.308 0.09464
## 2of5_1   0.521 0.27105
## 2of5_2   0.582 0.33837
## 2of5_3   0.620 0.38423
## 2of5_4   0.640 0.40957
## 2of5_5   0.546 0.29805
## 2of5_6   0.771 0.59518
## 2of5_7   0.383 0.14651
## 2of5_8   0.629 0.39582
## 2of5_9   0.617 0.38059
## 2of5_10  0.351 0.12339
## 2of5_11  0.397 0.15783
## 2of5_12  0.462 0.21379
## 2of5_13  0.509 0.25893
## 2of5_14  0.350 0.12239
## 2of5_15  0.715 0.51183
## 2of5_16  0.586 0.34328
## 2of5_17  0.731 0.53427
## 2of5_18  0.761 0.57919
## 2of5_19  0.533 0.28458
## 2of5_20  0.385 0.14789
## 2of5_21  0.561 0.31423
## 2of5_22  0.901 0.81262
## 2of5_23  0.372 0.13853
## 2of5_24  0.285 0.08135
## 2of5_25  0.743 0.55173
## 2of5_26  0.743 0.55217
## 2of5_27  0.617 0.38078
## 2of5_28  0.348 0.12118
## 2of5_29  0.607 0.36849
## 2of5_30  0.637 0.40591
## 2of5_31  0.593 0.35144
## 2of5_32  0.583 0.33980
## 2of5_33  0.365 0.13304
## 2of5_34  0.491 0.24127
## 2of5_35  0.346 0.12001
## 2of5_36  0.879 0.77203
## 2of5_37  0.992 0.98379
## 3of5_1   0.247 0.06079
## 3of5_2   0.513 0.26333
## 3of5_3   0.529 0.28004
## 3of5_4   0.736 0.54120
## 3of5_5   0.391 0.15255
## 3of5_6   0.739 0.54626
## 3of5_7   0.607 0.36903
## 3of5_8   0.100 0.01008
## 3of5_9   0.253 0.06387
## 3of5_10  0.799 0.63823
## 3of5_11  0.725 0.52514
## 3of5_12  0.587 0.34506
## 3of5_13  0.318 0.10106
## 3of5_14  0.524 0.27491
## 3of5_15 -0.038 0.00145
## 3of5_16  0.407 0.16550
## 3of5_17  0.494 0.24440
## 3of5_18  0.380 0.14436
## 3of5_19 -0.336 0.11263
## 3of5_20  0.569 0.32320
## 3of5_21  0.420 0.17622
## 3of5_22  0.498 0.24759
## 3of5_23  0.811 0.65806
## 3of5_24  0.609 0.37113
## 3of5_25  0.571 0.32581
## 3of5_26  0.728 0.52989
## 3of5_27  0.589 0.34742
## 3of5_28  0.409 0.16703
## 3of5_29  0.571 0.32574
## 3of5_30  0.590 0.34832
## 3of5_31  0.457 0.20895
## 3of5_32  0.637 0.40528
## 
## SS loadings:  22.9 
## Proportion Var:  0.313 
## 
## Factor correlations: 
## 
##    F1
## F1  1
hard_items_scored_stats$itemstats$g_loading = hard_items_fit %>% summary() %>% .$rotF %>% as.vector()
##             F1      h2
## 1of5_1   0.400 0.16006
## 1of5_2   0.272 0.07408
## 1of5_3   0.293 0.08584
## 1of5_4   0.308 0.09464
## 2of5_1   0.521 0.27105
## 2of5_2   0.582 0.33837
## 2of5_3   0.620 0.38423
## 2of5_4   0.640 0.40957
## 2of5_5   0.546 0.29805
## 2of5_6   0.771 0.59518
## 2of5_7   0.383 0.14651
## 2of5_8   0.629 0.39582
## 2of5_9   0.617 0.38059
## 2of5_10  0.351 0.12339
## 2of5_11  0.397 0.15783
## 2of5_12  0.462 0.21379
## 2of5_13  0.509 0.25893
## 2of5_14  0.350 0.12239
## 2of5_15  0.715 0.51183
## 2of5_16  0.586 0.34328
## 2of5_17  0.731 0.53427
## 2of5_18  0.761 0.57919
## 2of5_19  0.533 0.28458
## 2of5_20  0.385 0.14789
## 2of5_21  0.561 0.31423
## 2of5_22  0.901 0.81262
## 2of5_23  0.372 0.13853
## 2of5_24  0.285 0.08135
## 2of5_25  0.743 0.55173
## 2of5_26  0.743 0.55217
## 2of5_27  0.617 0.38078
## 2of5_28  0.348 0.12118
## 2of5_29  0.607 0.36849
## 2of5_30  0.637 0.40591
## 2of5_31  0.593 0.35144
## 2of5_32  0.583 0.33980
## 2of5_33  0.365 0.13304
## 2of5_34  0.491 0.24127
## 2of5_35  0.346 0.12001
## 2of5_36  0.879 0.77203
## 2of5_37  0.992 0.98379
## 3of5_1   0.247 0.06079
## 3of5_2   0.513 0.26333
## 3of5_3   0.529 0.28004
## 3of5_4   0.736 0.54120
## 3of5_5   0.391 0.15255
## 3of5_6   0.739 0.54626
## 3of5_7   0.607 0.36903
## 3of5_8   0.100 0.01008
## 3of5_9   0.253 0.06387
## 3of5_10  0.799 0.63823
## 3of5_11  0.725 0.52514
## 3of5_12  0.587 0.34506
## 3of5_13  0.318 0.10106
## 3of5_14  0.524 0.27491
## 3of5_15 -0.038 0.00145
## 3of5_16  0.407 0.16550
## 3of5_17  0.494 0.24440
## 3of5_18  0.380 0.14436
## 3of5_19 -0.336 0.11263
## 3of5_20  0.569 0.32320
## 3of5_21  0.420 0.17622
## 3of5_22  0.498 0.24759
## 3of5_23  0.811 0.65806
## 3of5_24  0.609 0.37113
## 3of5_25  0.571 0.32581
## 3of5_26  0.728 0.52989
## 3of5_27  0.589 0.34742
## 3of5_28  0.409 0.16703
## 3of5_29  0.571 0.32574
## 3of5_30  0.590 0.34832
## 3of5_31  0.457 0.20895
## 3of5_32  0.637 0.40528
## 
## SS loadings:  22.9 
## Proportion Var:  0.313 
## 
## Factor correlations: 
## 
##    F1
## F1  1
hard_items_scored_stats$itemstats
hard_items_scored_stats$itemstats %>% describe2()
#difficulties
hard_items_scored_stats$itemstats$mean %>% GG_denhist()
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`

#loadings
hard_items_scored_stats$itemstats$g_loading %>% GG_denhist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#reliability
plot(hard_items_fit, type = "rxx")

hard_items_fit_scores = fscores(hard_items_fit, full.scores.SE = T)
empirical_rxx(hard_items_fit_scores)
##    F1 
## 0.942
#time spent vs. score
# hard_items_scored$time_spent_mins = (hard_items$`Date Submitted`-hard_items$`Time Started`)/60

#exact choices table
hard_items_2of5_choices = map_df(seq_along(hard_items_2of5) %>% split_every_k(k = 5), function(idx) {
  # browser()
  #subset cols
  i_cols = hard_items_2of5[, unlist(idx)]
  
  #string collapse across columns
  apply(i_cols, 1, function(x) {
    str_c(na.omit(x), collapse = ", ")
  })
}) %>% set_colnames("pick2of5_" + 1:ncol(hard_items_2of5_scored))

hard_items_3of5_choices = map_df(seq_along(hard_items_3of5) %>% split_every_k(k = 5), function(idx) {
  # browser()
  #subset cols
  i_cols = hard_items_3of5[, unlist(idx)]
  
  #string collapse across columns
  apply(i_cols, 1, function(x) {
    str_c(na.omit(x), collapse = ", ")
  })
}) %>% set_colnames("pick3of5_" + 1:ncol(hard_items_3of5_scored))

Full test

Easy and hard items together

#merge data
all_items_scored = left_join(
  easy_items_scored %>% mutate(id = d$Participant_id),
  hard_items_scored %>% mutate(id = hard_items$`Write in your Prolific ID:`),
  by = "id"
)

#no dups
assert_that(!anyDuplicated(all_items_scored$id))
## [1] TRUE
#fit all dataset to same model
#takes a while to converge at default settings
#not even after 20k iter
all_items_fit = cache_object(filename = "data/all_items_fit.rds", expr = mirt(
  all_items_scored %>% select(-id),
  model = 1,
  itemtype = "2PL",
  technical = list(NCYCLES = 2000)
),
renew = renew_all)
## Cache found, reading object from disk
#item stats
all_items_stats = itemstats(all_items_scored %>% select(-id))

all_items_fit
## 
## Call:
## mirt(data = all_items_scored %>% select(-id), model = 1, itemtype = "2PL", 
##     technical = list(NCYCLES = 2000))
## 
## Full-information item factor analysis with 1 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 2000 EM iterations.
## mirt version: 1.41.8 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -44440
## Estimated parameters: 464 
## AIC = 89809
## BIC = 91764; SABIC = 90291
all_items_fit %>% summary()
##                                         F1      h2
## a_specific_number                   0.5402 0.29178
## a_type_of_brass_instrument          0.8740 0.76391
## a_type_of_drapery                   0.7115 0.50621
## a_type_of_fabric_101                0.7985 0.63765
## a_type_of_fabric_98                 0.6737 0.45382
## a_type_of_grasshopper               0.7928 0.62846
## a_type_of_hat                       0.5003 0.25035
## a_type_of_heating_unit              0.6137 0.37662
## a_type_of_magistrate_position       0.8120 0.65930
## a_type_of_mathematical_operation    0.6522 0.42535
## a_type_of_overshoe                  0.4724 0.22316
## a_type_of_plant                     0.7854 0.61691
## a_type_of_religious_teacher         0.7917 0.62682
## a_type_of_rock                      0.5047 0.25471
## a_type_of_skirts                    0.6916 0.47825
## a_type_of_smokeless_powder          0.5355 0.28671
## a_type_of_sound                     0.5012 0.25118
## a_type_of_sword                     0.3829 0.14664
## a_type_of_volcanic_crater           0.6813 0.46415
## a_type_of_wavy_form                 0.7212 0.52011
## actionable_negligence               0.8212 0.67437
## advisory                            0.8008 0.64122
## advocate                            0.6281 0.39447
## almighty                            0.9206 0.84750
## amazement                           0.8094 0.65521
## amenability                         0.8035 0.64558
## aroma                               0.9202 0.84675
## auspices                            0.6018 0.36214
## avoid                               0.9104 0.82890
## barely_able_to_read_and_write       0.8564 0.73340
## because                             0.7820 0.61145
## berate                              0.7881 0.62109
## blade                               0.7893 0.62293
## blend                               0.7344 0.53932
## blunder                             0.8867 0.78616
## bow                                 0.7573 0.57349
## brief                               0.4498 0.20231
## bring_about                         0.8233 0.67781
## carelessly_or_hastily_put_together  0.7152 0.51158
## celebration                         0.7254 0.52621
## cheat                               0.7953 0.63250
## cheerful                            0.9771 0.95480
## clay_pigeon_shooting                0.8116 0.65874
## coarse                              0.8335 0.69470
## collect_or_study_insects            0.7704 0.59350
## colossal                            0.7277 0.52952
## commotion                           0.8922 0.79605
## complainer                          0.8110 0.65775
## confiscate                          0.8433 0.71114
## congratulate                        0.5825 0.33929
## construct                           0.9947 0.98949
## contemplate                         0.6225 0.38754
## convoy                              0.8539 0.72909
## couch                               0.6832 0.46681
## crease                              0.7893 0.62293
## cunning                             0.6950 0.48299
## deceit                              0.5559 0.30903
## deliberately                        0.7557 0.57101
## deprive                             0.6533 0.42684
## detectable                          0.7698 0.59256
## diatribes                           0.6130 0.37573
## disjoined                           0.3336 0.11127
## disrupt                             0.8202 0.67280
## distinct                            0.3515 0.12352
## divergence                          0.7757 0.60164
## dome                                0.5769 0.33286
## downfall                            0.8049 0.64778
## drink                               0.8212 0.67442
## elite                               0.7477 0.55905
## embarrassment                       0.6113 0.37369
## emphasize                           0.8872 0.78708
## empire                              0.5812 0.33784
## empty                               0.7691 0.59158
## entanglement                        0.7057 0.49800
## environment                         0.5656 0.31987
## evil                                0.7274 0.52906
## excite                              0.7943 0.63090
## farewell                            0.9395 0.88260
## flammable                           0.9257 0.85696
## flatterer                           0.8856 0.78428
## flatteries                          0.5853 0.34253
## forever                             0.8159 0.66569
## frequent                            0.6202 0.38469
## gigantic                            0.8540 0.72929
## girl                                0.7739 0.59899
## goo                                 0.8689 0.75490
## goodbye                             0.8958 0.80254
## greed                               0.8170 0.66756
## groan                               0.8422 0.70933
## gruesome                            0.7053 0.49747
## guarantee                           0.5134 0.26359
## gutter                              0.8215 0.67486
## harmfulness                         0.5994 0.35932
## hobby                               0.7710 0.59445
## hut                                 0.8505 0.72337
## illness                             0.5155 0.26575
## impromptu                           0.6212 0.38583
## indescribable                       0.8333 0.69436
## intellectual                        0.7575 0.57379
## jargon                              0.9500 0.90248
## knowledgeable                       0.8314 0.69129
## lackadaisical                       0.5579 0.31121
## manager                             0.9059 0.82058
## meal                                0.7375 0.54386
## melodic                             0.7332 0.53752
## mutually                            0.8811 0.77631
## nonsense                            0.8746 0.76497
## nonsensical                         0.9169 0.84071
## not_coveted                         0.7009 0.49121
## pamper                              0.5639 0.31793
## penitentiary                        0.7132 0.50866
## perplexing                          0.7308 0.53408
## persistence                         0.7378 0.54431
## predetermine                        0.6998 0.48978
## pretender                           0.9175 0.84176
## questioning                         0.7481 0.55965
## quickly                             0.6500 0.42249
## rebellious                          0.8064 0.65025
## referee                             0.7960 0.63365
## referendum                          0.4748 0.22543
## relating_to_the_right               0.4511 0.20346
## relevant                            0.7349 0.54010
## remove                              0.9966 0.99326
## respectful                          0.7963 0.63411
## retailer                            0.6267 0.39270
## retract                             0.6419 0.41201
## ropes                               0.7853 0.61673
## sag                                 0.6624 0.43880
## schemer                             0.6338 0.40171
## seize                               0.7799 0.60830
## sensitivity                         0.7009 0.49122
## shadows                             0.6090 0.37084
## silly                               0.9573 0.91638
## sketch                              0.6401 0.40976
## slang                               0.8112 0.65800
## slave                               0.4300 0.18489
## sluggish                            0.6997 0.48957
## somber                              0.5596 0.31316
## spinelessness                       0.6696 0.44839
## sporadic                            0.8349 0.69700
## squad                               0.9409 0.88536
## stagger                             0.7265 0.52779
## stinking                            0.8940 0.79922
## stroll                              0.8496 0.72189
## stubborn_100                        0.6696 0.44831
## stubborn_143                        0.7074 0.50042
## stylish                             0.6282 0.39464
## summit                              0.7413 0.54959
## terminology                         0.8137 0.66218
## the_science_of_speech_sounds        0.8670 0.75171
## transportation                      0.3848 0.14807
## tyrant                              0.4110 0.16891
## unhealthful                         0.4015 0.16124
## vile                                0.8990 0.80818
## vulgar                              0.7444 0.55412
## wandering                           0.4161 0.17310
## warning                             0.7491 0.56109
## wave                                0.7362 0.54193
## weaponry                            0.6727 0.45252
## 1of5_1                              0.3457 0.11952
## 1of5_2                              0.2166 0.04691
## 1of5_3                              0.2691 0.07241
## 1of5_4                              0.2739 0.07503
## 2of5_1                              0.4395 0.19317
## 2of5_2                              0.5234 0.27396
## 2of5_3                              0.5978 0.35733
## 2of5_4                              0.5995 0.35935
## 2of5_5                              0.5419 0.29369
## 2of5_6                              0.7464 0.55704
## 2of5_7                              0.3704 0.13720
## 2of5_8                              0.5831 0.34001
## 2of5_9                              0.5813 0.33793
## 2of5_10                             0.3447 0.11882
## 2of5_11                             0.3982 0.15858
## 2of5_12                             0.4071 0.16569
## 2of5_13                             0.4935 0.24352
## 2of5_14                             0.3212 0.10317
## 2of5_15                             0.6787 0.46059
## 2of5_16                             0.5334 0.28453
## 2of5_17                             0.6974 0.48635
## 2of5_18                             0.7224 0.52183
## 2of5_19                             0.4725 0.22321
## 2of5_20                             0.3381 0.11429
## 2of5_21                             0.5263 0.27701
## 2of5_22                             0.8467 0.71692
## 2of5_23                             0.3233 0.10450
## 2of5_24                             0.2244 0.05037
## 2of5_25                             0.7314 0.53496
## 2of5_26                             0.7083 0.50163
## 2of5_27                             0.5498 0.30230
## 2of5_28                             0.3321 0.11026
## 2of5_29                             0.5641 0.31823
## 2of5_30                             0.5882 0.34600
## 2of5_31                             0.5485 0.30083
## 2of5_32                             0.5008 0.25076
## 2of5_33                             0.3161 0.09993
## 2of5_34                             0.4379 0.19176
## 2of5_35                             0.3107 0.09655
## 2of5_36                             0.8086 0.65389
## 2of5_37                             0.8983 0.80698
## 3of5_1                              0.2613 0.06825
## 3of5_2                              0.4582 0.20990
## 3of5_3                              0.4801 0.23052
## 3of5_4                              0.6882 0.47361
## 3of5_5                              0.3766 0.14179
## 3of5_6                              0.7321 0.53603
## 3of5_7                              0.5657 0.32005
## 3of5_8                              0.0823 0.00677
## 3of5_9                              0.2377 0.05652
## 3of5_10                             0.8191 0.67092
## 3of5_11                             0.6980 0.48725
## 3of5_12                             0.5524 0.30516
## 3of5_13                             0.3193 0.10198
## 3of5_14                             0.5119 0.26200
## 3of5_15                            -0.0141 0.00020
## 3of5_16                             0.3939 0.15517
## 3of5_17                             0.4288 0.18389
## 3of5_18                             0.3632 0.13191
## 3of5_19                            -0.2751 0.07570
## 3of5_20                             0.5381 0.28951
## 3of5_21                             0.4473 0.20005
## 3of5_22                             0.4683 0.21933
## 3of5_23                             0.8638 0.74621
## 3of5_24                             0.5869 0.34448
## 3of5_25                             0.5332 0.28427
## 3of5_26                             0.6875 0.47259
## 3of5_27                             0.5614 0.31522
## 3of5_28                             0.4422 0.19556
## 3of5_29                             0.5839 0.34089
## 3of5_30                             0.5036 0.25362
## 3of5_31                             0.4652 0.21639
## 3of5_32                             0.6161 0.37953
## 
## SS loadings:  108 
## Proportion Var:  0.463 
## 
## Factor correlations: 
## 
##    F1
## F1  1
all_items_stats$itemstats$g_loading = all_items_fit %>% summary() %>% .$rotF %>% as.vector()
##                                         F1      h2
## a_specific_number                   0.5402 0.29178
## a_type_of_brass_instrument          0.8740 0.76391
## a_type_of_drapery                   0.7115 0.50621
## a_type_of_fabric_101                0.7985 0.63765
## a_type_of_fabric_98                 0.6737 0.45382
## a_type_of_grasshopper               0.7928 0.62846
## a_type_of_hat                       0.5003 0.25035
## a_type_of_heating_unit              0.6137 0.37662
## a_type_of_magistrate_position       0.8120 0.65930
## a_type_of_mathematical_operation    0.6522 0.42535
## a_type_of_overshoe                  0.4724 0.22316
## a_type_of_plant                     0.7854 0.61691
## a_type_of_religious_teacher         0.7917 0.62682
## a_type_of_rock                      0.5047 0.25471
## a_type_of_skirts                    0.6916 0.47825
## a_type_of_smokeless_powder          0.5355 0.28671
## a_type_of_sound                     0.5012 0.25118
## a_type_of_sword                     0.3829 0.14664
## a_type_of_volcanic_crater           0.6813 0.46415
## a_type_of_wavy_form                 0.7212 0.52011
## actionable_negligence               0.8212 0.67437
## advisory                            0.8008 0.64122
## advocate                            0.6281 0.39447
## almighty                            0.9206 0.84750
## amazement                           0.8094 0.65521
## amenability                         0.8035 0.64558
## aroma                               0.9202 0.84675
## auspices                            0.6018 0.36214
## avoid                               0.9104 0.82890
## barely_able_to_read_and_write       0.8564 0.73340
## because                             0.7820 0.61145
## berate                              0.7881 0.62109
## blade                               0.7893 0.62293
## blend                               0.7344 0.53932
## blunder                             0.8867 0.78616
## bow                                 0.7573 0.57349
## brief                               0.4498 0.20231
## bring_about                         0.8233 0.67781
## carelessly_or_hastily_put_together  0.7152 0.51158
## celebration                         0.7254 0.52621
## cheat                               0.7953 0.63250
## cheerful                            0.9771 0.95480
## clay_pigeon_shooting                0.8116 0.65874
## coarse                              0.8335 0.69470
## collect_or_study_insects            0.7704 0.59350
## colossal                            0.7277 0.52952
## commotion                           0.8922 0.79605
## complainer                          0.8110 0.65775
## confiscate                          0.8433 0.71114
## congratulate                        0.5825 0.33929
## construct                           0.9947 0.98949
## contemplate                         0.6225 0.38754
## convoy                              0.8539 0.72909
## couch                               0.6832 0.46681
## crease                              0.7893 0.62293
## cunning                             0.6950 0.48299
## deceit                              0.5559 0.30903
## deliberately                        0.7557 0.57101
## deprive                             0.6533 0.42684
## detectable                          0.7698 0.59256
## diatribes                           0.6130 0.37573
## disjoined                           0.3336 0.11127
## disrupt                             0.8202 0.67280
## distinct                            0.3515 0.12352
## divergence                          0.7757 0.60164
## dome                                0.5769 0.33286
## downfall                            0.8049 0.64778
## drink                               0.8212 0.67442
## elite                               0.7477 0.55905
## embarrassment                       0.6113 0.37369
## emphasize                           0.8872 0.78708
## empire                              0.5812 0.33784
## empty                               0.7691 0.59158
## entanglement                        0.7057 0.49800
## environment                         0.5656 0.31987
## evil                                0.7274 0.52906
## excite                              0.7943 0.63090
## farewell                            0.9395 0.88260
## flammable                           0.9257 0.85696
## flatterer                           0.8856 0.78428
## flatteries                          0.5853 0.34253
## forever                             0.8159 0.66569
## frequent                            0.6202 0.38469
## gigantic                            0.8540 0.72929
## girl                                0.7739 0.59899
## goo                                 0.8689 0.75490
## goodbye                             0.8958 0.80254
## greed                               0.8170 0.66756
## groan                               0.8422 0.70933
## gruesome                            0.7053 0.49747
## guarantee                           0.5134 0.26359
## gutter                              0.8215 0.67486
## harmfulness                         0.5994 0.35932
## hobby                               0.7710 0.59445
## hut                                 0.8505 0.72337
## illness                             0.5155 0.26575
## impromptu                           0.6212 0.38583
## indescribable                       0.8333 0.69436
## intellectual                        0.7575 0.57379
## jargon                              0.9500 0.90248
## knowledgeable                       0.8314 0.69129
## lackadaisical                       0.5579 0.31121
## manager                             0.9059 0.82058
## meal                                0.7375 0.54386
## melodic                             0.7332 0.53752
## mutually                            0.8811 0.77631
## nonsense                            0.8746 0.76497
## nonsensical                         0.9169 0.84071
## not_coveted                         0.7009 0.49121
## pamper                              0.5639 0.31793
## penitentiary                        0.7132 0.50866
## perplexing                          0.7308 0.53408
## persistence                         0.7378 0.54431
## predetermine                        0.6998 0.48978
## pretender                           0.9175 0.84176
## questioning                         0.7481 0.55965
## quickly                             0.6500 0.42249
## rebellious                          0.8064 0.65025
## referee                             0.7960 0.63365
## referendum                          0.4748 0.22543
## relating_to_the_right               0.4511 0.20346
## relevant                            0.7349 0.54010
## remove                              0.9966 0.99326
## respectful                          0.7963 0.63411
## retailer                            0.6267 0.39270
## retract                             0.6419 0.41201
## ropes                               0.7853 0.61673
## sag                                 0.6624 0.43880
## schemer                             0.6338 0.40171
## seize                               0.7799 0.60830
## sensitivity                         0.7009 0.49122
## shadows                             0.6090 0.37084
## silly                               0.9573 0.91638
## sketch                              0.6401 0.40976
## slang                               0.8112 0.65800
## slave                               0.4300 0.18489
## sluggish                            0.6997 0.48957
## somber                              0.5596 0.31316
## spinelessness                       0.6696 0.44839
## sporadic                            0.8349 0.69700
## squad                               0.9409 0.88536
## stagger                             0.7265 0.52779
## stinking                            0.8940 0.79922
## stroll                              0.8496 0.72189
## stubborn_100                        0.6696 0.44831
## stubborn_143                        0.7074 0.50042
## stylish                             0.6282 0.39464
## summit                              0.7413 0.54959
## terminology                         0.8137 0.66218
## the_science_of_speech_sounds        0.8670 0.75171
## transportation                      0.3848 0.14807
## tyrant                              0.4110 0.16891
## unhealthful                         0.4015 0.16124
## vile                                0.8990 0.80818
## vulgar                              0.7444 0.55412
## wandering                           0.4161 0.17310
## warning                             0.7491 0.56109
## wave                                0.7362 0.54193
## weaponry                            0.6727 0.45252
## 1of5_1                              0.3457 0.11952
## 1of5_2                              0.2166 0.04691
## 1of5_3                              0.2691 0.07241
## 1of5_4                              0.2739 0.07503
## 2of5_1                              0.4395 0.19317
## 2of5_2                              0.5234 0.27396
## 2of5_3                              0.5978 0.35733
## 2of5_4                              0.5995 0.35935
## 2of5_5                              0.5419 0.29369
## 2of5_6                              0.7464 0.55704
## 2of5_7                              0.3704 0.13720
## 2of5_8                              0.5831 0.34001
## 2of5_9                              0.5813 0.33793
## 2of5_10                             0.3447 0.11882
## 2of5_11                             0.3982 0.15858
## 2of5_12                             0.4071 0.16569
## 2of5_13                             0.4935 0.24352
## 2of5_14                             0.3212 0.10317
## 2of5_15                             0.6787 0.46059
## 2of5_16                             0.5334 0.28453
## 2of5_17                             0.6974 0.48635
## 2of5_18                             0.7224 0.52183
## 2of5_19                             0.4725 0.22321
## 2of5_20                             0.3381 0.11429
## 2of5_21                             0.5263 0.27701
## 2of5_22                             0.8467 0.71692
## 2of5_23                             0.3233 0.10450
## 2of5_24                             0.2244 0.05037
## 2of5_25                             0.7314 0.53496
## 2of5_26                             0.7083 0.50163
## 2of5_27                             0.5498 0.30230
## 2of5_28                             0.3321 0.11026
## 2of5_29                             0.5641 0.31823
## 2of5_30                             0.5882 0.34600
## 2of5_31                             0.5485 0.30083
## 2of5_32                             0.5008 0.25076
## 2of5_33                             0.3161 0.09993
## 2of5_34                             0.4379 0.19176
## 2of5_35                             0.3107 0.09655
## 2of5_36                             0.8086 0.65389
## 2of5_37                             0.8983 0.80698
## 3of5_1                              0.2613 0.06825
## 3of5_2                              0.4582 0.20990
## 3of5_3                              0.4801 0.23052
## 3of5_4                              0.6882 0.47361
## 3of5_5                              0.3766 0.14179
## 3of5_6                              0.7321 0.53603
## 3of5_7                              0.5657 0.32005
## 3of5_8                              0.0823 0.00677
## 3of5_9                              0.2377 0.05652
## 3of5_10                             0.8191 0.67092
## 3of5_11                             0.6980 0.48725
## 3of5_12                             0.5524 0.30516
## 3of5_13                             0.3193 0.10198
## 3of5_14                             0.5119 0.26200
## 3of5_15                            -0.0141 0.00020
## 3of5_16                             0.3939 0.15517
## 3of5_17                             0.4288 0.18389
## 3of5_18                             0.3632 0.13191
## 3of5_19                            -0.2751 0.07570
## 3of5_20                             0.5381 0.28951
## 3of5_21                             0.4473 0.20005
## 3of5_22                             0.4683 0.21933
## 3of5_23                             0.8638 0.74621
## 3of5_24                             0.5869 0.34448
## 3of5_25                             0.5332 0.28427
## 3of5_26                             0.6875 0.47259
## 3of5_27                             0.5614 0.31522
## 3of5_28                             0.4422 0.19556
## 3of5_29                             0.5839 0.34089
## 3of5_30                             0.5036 0.25362
## 3of5_31                             0.4652 0.21639
## 3of5_32                             0.6161 0.37953
## 
## SS loadings:  108 
## Proportion Var:  0.463 
## 
## Factor correlations: 
## 
##    F1
## F1  1
all_items_stats$itemstats$discrim = coef(all_items_fit, simplify = T)$items[, 1] %>% unname()
all_items_stats$itemstats$difficulty = -coef(all_items_fit, simplify = T)$items[, 2] %>% unname()
all_items_stats$itemstats
all_items_stats$itemstats %>% 
  describe2()
#mean rates by wave
all_items_stats$itemstats %>% 
  filter(N == 499) %>% 
  describe2()
all_items_stats$itemstats %>% 
  filter(N < 499) %>% 
  describe2()
#difficulties
all_items_stats$itemstats$mean %>% GG_denhist() +
  scale_x_continuous("Pass rate")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/all items pass rate.png")

#loadings
all_items_stats$itemstats$g_loading %>% GG_denhist() +
    scale_x_continuous("Factor loading")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/all items factor loading.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
plot(all_items_fit, type = "rxx")

rxx_info = plot(all_items_fit, type = "rxx")
all_items_fit_scores = fscores(all_items_fit, full.scores.SE = T)
empirical_rxx(all_items_fit_scores)
##    F1 
## 0.977
#which range has >.90?
d_rxx_info = tibble(
  z = rxx_info$panel.args[[1]]$x,
  rel = rxx_info$panel.args[[1]]$y
)

d_rxx_info %>% 
  filter(rel > .90) %>% 
  describe2()
d_rxx_info %>% 
  filter(rel > .80) %>% 
  describe2()
d_rxx_info %>% 
  ggplot(aes(z, rel)) +
  geom_line() +
  scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
  scale_x_continuous("Ability level (z)") +
  coord_cartesian(xlim = c(-4, 4))

GG_save("figs/reliability as function of ability.png")

d_rxx_info %>% 
  filter(z >= -2, z <= 2) %>% 
  describe2()
d_rxx_info %>% 
  filter(z >= -3, z <= 3) %>% 
  describe2()
#difficulty and g-loading
all_items_stats$itemstats %>% 
  rownames_to_column() %>% 
  GG_scatter("mean", "g_loading", case_names = "rowname") +
  xlab("Pass rate") +
  ylab("Factor loading")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/scatter pass rate loading.png")
## `geom_smooth()` using formula = 'y ~ x'
#merge g score back to main dataset
d2 = left_join(
  d,
  tibble(id = all_items_scored$id, 
         g = all_items_fit_scores[, 1]),
  by = c("Participant_id" = "id")
)

assert_that(!anyDuplicated(d2$Participant_id))
## [1] TRUE
#restandardize g for age, white subset
d2$g_ageadj = resid(ols(g ~  rcs(age), data = d2)) %>% unname()
## number of knots in rcs defaulting to 5
d2$g_ageadj_z = standardize(d2$g, focal_group = d2$white_only)

Good items

Drop a few bad items

all_items_stats$itemstats %>% filter(g_loading < .25)
good_items_scored = all_items_scored %>% select(-!!(all_items_stats$itemstats %>% filter(g_loading < .25) %>% rownames()))

#refit
good_items_fit = cache_object(filename = "good_items_fit.rds", expr = mirt(
  good_items_scored %>% select(-id),
  model = 1,
  itemtype = "2PL",
  technical = list(NCYCLES = 2000)
),
renew = renew_all)
## Cache found, reading object from disk
good_items_fit
## 
## Call:
## mirt(data = good_items_scored %>% select(-id), model = 1, itemtype = "2PL", 
##     technical = list(NCYCLES = 2000))
## 
## Full-information item factor analysis with 1 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 2000 EM iterations.
## mirt version: 1.41.8 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -43177
## Estimated parameters: 452 
## AIC = 87258
## BIC = 89162; SABIC = 87727
good_items_fit %>% summary()
##                                       F1     h2
## a_specific_number                  0.541 0.2927
## a_type_of_brass_instrument         0.875 0.7648
## a_type_of_drapery                  0.713 0.5084
## a_type_of_fabric_101               0.799 0.6387
## a_type_of_fabric_98                0.675 0.4552
## a_type_of_grasshopper              0.793 0.6295
## a_type_of_hat                      0.501 0.2513
## a_type_of_heating_unit             0.614 0.3776
## a_type_of_magistrate_position      0.813 0.6616
## a_type_of_mathematical_operation   0.653 0.4260
## a_type_of_overshoe                 0.472 0.2231
## a_type_of_plant                    0.786 0.6174
## a_type_of_religious_teacher        0.792 0.6273
## a_type_of_rock                     0.507 0.2572
## a_type_of_skirts                   0.692 0.4794
## a_type_of_smokeless_powder         0.536 0.2872
## a_type_of_sound                    0.502 0.2522
## a_type_of_sword                    0.383 0.1468
## a_type_of_volcanic_crater          0.682 0.4645
## a_type_of_wavy_form                0.722 0.5206
## actionable_negligence              0.822 0.6758
## advisory                           0.801 0.6413
## advocate                           0.629 0.3956
## almighty                           0.921 0.8475
## amazement                          0.810 0.6559
## amenability                        0.804 0.6472
## aroma                              0.921 0.8476
## auspices                           0.602 0.3629
## avoid                              0.910 0.8282
## barely_able_to_read_and_write      0.857 0.7346
## because                            0.782 0.6123
## berate                             0.789 0.6218
## blade                              0.790 0.6238
## blend                              0.735 0.5408
## blunder                            0.887 0.7871
## bow                                0.758 0.5745
## brief                              0.449 0.2020
## bring_about                        0.824 0.6795
## carelessly_or_hastily_put_together 0.716 0.5128
## celebration                        0.726 0.5265
## cheat                              0.796 0.6339
## cheerful                           0.977 0.9545
## clay_pigeon_shooting               0.812 0.6600
## coarse                             0.834 0.6953
## collect_or_study_insects           0.771 0.5946
## colossal                           0.728 0.5306
## commotion                          0.893 0.7968
## complainer                         0.812 0.6589
## confiscate                         0.844 0.7121
## congratulate                       0.583 0.3397
## construct                          0.994 0.9882
## contemplate                        0.622 0.3872
## convoy                             0.855 0.7303
## couch                              0.684 0.4681
## crease                             0.789 0.6232
## cunning                            0.696 0.4842
## deceit                             0.556 0.3096
## deliberately                       0.756 0.5719
## deprive                            0.654 0.4276
## detectable                         0.770 0.5931
## diatribes                          0.613 0.3763
## disjoined                          0.334 0.1113
## disrupt                            0.820 0.6729
## distinct                           0.351 0.1231
## divergence                         0.776 0.6023
## dome                               0.578 0.3336
## downfall                           0.805 0.6486
## drink                              0.822 0.6751
## elite                              0.748 0.5599
## embarrassment                      0.612 0.3745
## emphasize                          0.888 0.7878
## empire                             0.582 0.3386
## empty                              0.770 0.5923
## entanglement                       0.706 0.4988
## environment                        0.566 0.3209
## evil                               0.728 0.5304
## excite                             0.795 0.6316
## farewell                           0.940 0.8831
## flammable                          0.926 0.8572
## flatterer                          0.886 0.7849
## flatteries                         0.586 0.3435
## forever                            0.817 0.6668
## frequent                           0.621 0.3855
## gigantic                           0.854 0.7298
## girl                               0.774 0.5987
## goo                                0.870 0.7563
## goodbye                            0.896 0.8033
## greed                              0.818 0.6683
## groan                              0.843 0.7102
## gruesome                           0.707 0.4992
## guarantee                          0.514 0.2646
## gutter                             0.822 0.6761
## harmfulness                        0.600 0.3598
## hobby                              0.772 0.5957
## hut                                0.851 0.7240
## illness                            0.516 0.2661
## impromptu                          0.621 0.3861
## indescribable                      0.833 0.6947
## intellectual                       0.758 0.5747
## jargon                             0.950 0.9027
## knowledgeable                      0.832 0.6925
## lackadaisical                      0.559 0.3125
## manager                            0.906 0.8204
## meal                               0.739 0.5455
## melodic                            0.734 0.5381
## mutually                           0.881 0.7770
## nonsense                           0.875 0.7661
## nonsensical                        0.917 0.8409
## not_coveted                        0.701 0.4919
## pamper                             0.564 0.3186
## penitentiary                       0.714 0.5102
## perplexing                         0.731 0.5348
## persistence                        0.739 0.5461
## predetermine                       0.700 0.4899
## pretender                          0.918 0.8425
## questioning                        0.749 0.5607
## quickly                            0.651 0.4233
## rebellious                         0.807 0.6512
## referee                            0.796 0.6344
## referendum                         0.475 0.2257
## relating_to_the_right              0.452 0.2045
## relevant                           0.736 0.5414
## remove                             0.996 0.9926
## respectful                         0.796 0.6343
## retailer                           0.627 0.3936
## retract                            0.643 0.4136
## ropes                              0.786 0.6173
## sag                                0.662 0.4389
## schemer                            0.634 0.4023
## seize                              0.780 0.6091
## sensitivity                        0.701 0.4918
## shadows                            0.609 0.3705
## silly                              0.957 0.9163
## sketch                             0.641 0.4106
## slang                              0.812 0.6589
## slave                              0.430 0.1850
## sluggish                           0.699 0.4891
## somber                             0.560 0.3133
## spinelessness                      0.670 0.4493
## sporadic                           0.835 0.6980
## squad                              0.942 0.8866
## stagger                            0.727 0.5287
## stinking                           0.894 0.7999
## stroll                             0.850 0.7231
## stubborn_100                       0.670 0.4492
## stubborn_143                       0.708 0.5008
## stylish                            0.630 0.3963
## summit                             0.742 0.5501
## terminology                        0.814 0.6630
## the_science_of_speech_sounds       0.867 0.7522
## transportation                     0.385 0.1483
## tyrant                             0.411 0.1689
## unhealthful                        0.402 0.1619
## vile                               0.899 0.8088
## vulgar                             0.745 0.5550
## wandering                          0.416 0.1733
## warning                            0.749 0.5614
## wave                               0.737 0.5433
## weaponry                           0.673 0.4531
## 1of5_1                             0.346 0.1195
## 1of5_3                             0.270 0.0729
## 1of5_4                             0.274 0.0753
## 2of5_1                             0.439 0.1927
## 2of5_2                             0.523 0.2737
## 2of5_3                             0.598 0.3575
## 2of5_4                             0.600 0.3599
## 2of5_5                             0.543 0.2948
## 2of5_6                             0.747 0.5579
## 2of5_7                             0.371 0.1373
## 2of5_8                             0.583 0.3399
## 2of5_9                             0.582 0.3384
## 2of5_10                            0.345 0.1190
## 2of5_11                            0.398 0.1585
## 2of5_12                            0.407 0.1653
## 2of5_13                            0.494 0.2436
## 2of5_14                            0.320 0.1024
## 2of5_15                            0.679 0.4611
## 2of5_16                            0.534 0.2849
## 2of5_17                            0.697 0.4863
## 2of5_18                            0.722 0.5215
## 2of5_19                            0.474 0.2242
## 2of5_20                            0.338 0.1141
## 2of5_21                            0.526 0.2766
## 2of5_22                            0.847 0.7172
## 2of5_23                            0.322 0.1035
## 2of5_25                            0.732 0.5352
## 2of5_26                            0.708 0.5014
## 2of5_27                            0.551 0.3037
## 2of5_28                            0.332 0.1100
## 2of5_29                            0.565 0.3188
## 2of5_30                            0.589 0.3466
## 2of5_31                            0.549 0.3013
## 2of5_32                            0.500 0.2504
## 2of5_33                            0.317 0.1002
## 2of5_34                            0.438 0.1921
## 2of5_35                            0.311 0.0970
## 2of5_36                            0.809 0.6546
## 2of5_37                            0.898 0.8067
## 3of5_1                             0.261 0.0680
## 3of5_2                             0.458 0.2098
## 3of5_3                             0.481 0.2309
## 3of5_4                             0.688 0.4729
## 3of5_5                             0.376 0.1413
## 3of5_6                             0.733 0.5374
## 3of5_7                             0.567 0.3212
## 3of5_10                            0.820 0.6725
## 3of5_11                            0.698 0.4877
## 3of5_12                            0.553 0.3054
## 3of5_13                            0.321 0.1028
## 3of5_14                            0.512 0.2624
## 3of5_16                            0.395 0.1562
## 3of5_17                            0.430 0.1845
## 3of5_18                            0.364 0.1323
## 3of5_20                            0.538 0.2891
## 3of5_21                            0.449 0.2016
## 3of5_22                            0.467 0.2184
## 3of5_23                            0.864 0.7471
## 3of5_24                            0.587 0.3443
## 3of5_25                            0.534 0.2848
## 3of5_26                            0.687 0.4724
## 3of5_27                            0.562 0.3154
## 3of5_28                            0.442 0.1957
## 3of5_29                            0.585 0.3422
## 3of5_30                            0.504 0.2537
## 3of5_31                            0.465 0.2166
## 3of5_32                            0.617 0.3808
## 
## SS loadings:  107 
## Proportion Var:  0.475 
## 
## Factor correlations: 
## 
##    F1
## F1  1
good_items_stats = itemstats(good_items_scored %>% select(-id))
good_items_stats$itemstats$g_loading = good_items_fit %>% summary() %>% .$rotF %>% as.vector()
##                                       F1     h2
## a_specific_number                  0.541 0.2927
## a_type_of_brass_instrument         0.875 0.7648
## a_type_of_drapery                  0.713 0.5084
## a_type_of_fabric_101               0.799 0.6387
## a_type_of_fabric_98                0.675 0.4552
## a_type_of_grasshopper              0.793 0.6295
## a_type_of_hat                      0.501 0.2513
## a_type_of_heating_unit             0.614 0.3776
## a_type_of_magistrate_position      0.813 0.6616
## a_type_of_mathematical_operation   0.653 0.4260
## a_type_of_overshoe                 0.472 0.2231
## a_type_of_plant                    0.786 0.6174
## a_type_of_religious_teacher        0.792 0.6273
## a_type_of_rock                     0.507 0.2572
## a_type_of_skirts                   0.692 0.4794
## a_type_of_smokeless_powder         0.536 0.2872
## a_type_of_sound                    0.502 0.2522
## a_type_of_sword                    0.383 0.1468
## a_type_of_volcanic_crater          0.682 0.4645
## a_type_of_wavy_form                0.722 0.5206
## actionable_negligence              0.822 0.6758
## advisory                           0.801 0.6413
## advocate                           0.629 0.3956
## almighty                           0.921 0.8475
## amazement                          0.810 0.6559
## amenability                        0.804 0.6472
## aroma                              0.921 0.8476
## auspices                           0.602 0.3629
## avoid                              0.910 0.8282
## barely_able_to_read_and_write      0.857 0.7346
## because                            0.782 0.6123
## berate                             0.789 0.6218
## blade                              0.790 0.6238
## blend                              0.735 0.5408
## blunder                            0.887 0.7871
## bow                                0.758 0.5745
## brief                              0.449 0.2020
## bring_about                        0.824 0.6795
## carelessly_or_hastily_put_together 0.716 0.5128
## celebration                        0.726 0.5265
## cheat                              0.796 0.6339
## cheerful                           0.977 0.9545
## clay_pigeon_shooting               0.812 0.6600
## coarse                             0.834 0.6953
## collect_or_study_insects           0.771 0.5946
## colossal                           0.728 0.5306
## commotion                          0.893 0.7968
## complainer                         0.812 0.6589
## confiscate                         0.844 0.7121
## congratulate                       0.583 0.3397
## construct                          0.994 0.9882
## contemplate                        0.622 0.3872
## convoy                             0.855 0.7303
## couch                              0.684 0.4681
## crease                             0.789 0.6232
## cunning                            0.696 0.4842
## deceit                             0.556 0.3096
## deliberately                       0.756 0.5719
## deprive                            0.654 0.4276
## detectable                         0.770 0.5931
## diatribes                          0.613 0.3763
## disjoined                          0.334 0.1113
## disrupt                            0.820 0.6729
## distinct                           0.351 0.1231
## divergence                         0.776 0.6023
## dome                               0.578 0.3336
## downfall                           0.805 0.6486
## drink                              0.822 0.6751
## elite                              0.748 0.5599
## embarrassment                      0.612 0.3745
## emphasize                          0.888 0.7878
## empire                             0.582 0.3386
## empty                              0.770 0.5923
## entanglement                       0.706 0.4988
## environment                        0.566 0.3209
## evil                               0.728 0.5304
## excite                             0.795 0.6316
## farewell                           0.940 0.8831
## flammable                          0.926 0.8572
## flatterer                          0.886 0.7849
## flatteries                         0.586 0.3435
## forever                            0.817 0.6668
## frequent                           0.621 0.3855
## gigantic                           0.854 0.7298
## girl                               0.774 0.5987
## goo                                0.870 0.7563
## goodbye                            0.896 0.8033
## greed                              0.818 0.6683
## groan                              0.843 0.7102
## gruesome                           0.707 0.4992
## guarantee                          0.514 0.2646
## gutter                             0.822 0.6761
## harmfulness                        0.600 0.3598
## hobby                              0.772 0.5957
## hut                                0.851 0.7240
## illness                            0.516 0.2661
## impromptu                          0.621 0.3861
## indescribable                      0.833 0.6947
## intellectual                       0.758 0.5747
## jargon                             0.950 0.9027
## knowledgeable                      0.832 0.6925
## lackadaisical                      0.559 0.3125
## manager                            0.906 0.8204
## meal                               0.739 0.5455
## melodic                            0.734 0.5381
## mutually                           0.881 0.7770
## nonsense                           0.875 0.7661
## nonsensical                        0.917 0.8409
## not_coveted                        0.701 0.4919
## pamper                             0.564 0.3186
## penitentiary                       0.714 0.5102
## perplexing                         0.731 0.5348
## persistence                        0.739 0.5461
## predetermine                       0.700 0.4899
## pretender                          0.918 0.8425
## questioning                        0.749 0.5607
## quickly                            0.651 0.4233
## rebellious                         0.807 0.6512
## referee                            0.796 0.6344
## referendum                         0.475 0.2257
## relating_to_the_right              0.452 0.2045
## relevant                           0.736 0.5414
## remove                             0.996 0.9926
## respectful                         0.796 0.6343
## retailer                           0.627 0.3936
## retract                            0.643 0.4136
## ropes                              0.786 0.6173
## sag                                0.662 0.4389
## schemer                            0.634 0.4023
## seize                              0.780 0.6091
## sensitivity                        0.701 0.4918
## shadows                            0.609 0.3705
## silly                              0.957 0.9163
## sketch                             0.641 0.4106
## slang                              0.812 0.6589
## slave                              0.430 0.1850
## sluggish                           0.699 0.4891
## somber                             0.560 0.3133
## spinelessness                      0.670 0.4493
## sporadic                           0.835 0.6980
## squad                              0.942 0.8866
## stagger                            0.727 0.5287
## stinking                           0.894 0.7999
## stroll                             0.850 0.7231
## stubborn_100                       0.670 0.4492
## stubborn_143                       0.708 0.5008
## stylish                            0.630 0.3963
## summit                             0.742 0.5501
## terminology                        0.814 0.6630
## the_science_of_speech_sounds       0.867 0.7522
## transportation                     0.385 0.1483
## tyrant                             0.411 0.1689
## unhealthful                        0.402 0.1619
## vile                               0.899 0.8088
## vulgar                             0.745 0.5550
## wandering                          0.416 0.1733
## warning                            0.749 0.5614
## wave                               0.737 0.5433
## weaponry                           0.673 0.4531
## 1of5_1                             0.346 0.1195
## 1of5_3                             0.270 0.0729
## 1of5_4                             0.274 0.0753
## 2of5_1                             0.439 0.1927
## 2of5_2                             0.523 0.2737
## 2of5_3                             0.598 0.3575
## 2of5_4                             0.600 0.3599
## 2of5_5                             0.543 0.2948
## 2of5_6                             0.747 0.5579
## 2of5_7                             0.371 0.1373
## 2of5_8                             0.583 0.3399
## 2of5_9                             0.582 0.3384
## 2of5_10                            0.345 0.1190
## 2of5_11                            0.398 0.1585
## 2of5_12                            0.407 0.1653
## 2of5_13                            0.494 0.2436
## 2of5_14                            0.320 0.1024
## 2of5_15                            0.679 0.4611
## 2of5_16                            0.534 0.2849
## 2of5_17                            0.697 0.4863
## 2of5_18                            0.722 0.5215
## 2of5_19                            0.474 0.2242
## 2of5_20                            0.338 0.1141
## 2of5_21                            0.526 0.2766
## 2of5_22                            0.847 0.7172
## 2of5_23                            0.322 0.1035
## 2of5_25                            0.732 0.5352
## 2of5_26                            0.708 0.5014
## 2of5_27                            0.551 0.3037
## 2of5_28                            0.332 0.1100
## 2of5_29                            0.565 0.3188
## 2of5_30                            0.589 0.3466
## 2of5_31                            0.549 0.3013
## 2of5_32                            0.500 0.2504
## 2of5_33                            0.317 0.1002
## 2of5_34                            0.438 0.1921
## 2of5_35                            0.311 0.0970
## 2of5_36                            0.809 0.6546
## 2of5_37                            0.898 0.8067
## 3of5_1                             0.261 0.0680
## 3of5_2                             0.458 0.2098
## 3of5_3                             0.481 0.2309
## 3of5_4                             0.688 0.4729
## 3of5_5                             0.376 0.1413
## 3of5_6                             0.733 0.5374
## 3of5_7                             0.567 0.3212
## 3of5_10                            0.820 0.6725
## 3of5_11                            0.698 0.4877
## 3of5_12                            0.553 0.3054
## 3of5_13                            0.321 0.1028
## 3of5_14                            0.512 0.2624
## 3of5_16                            0.395 0.1562
## 3of5_17                            0.430 0.1845
## 3of5_18                            0.364 0.1323
## 3of5_20                            0.538 0.2891
## 3of5_21                            0.449 0.2016
## 3of5_22                            0.467 0.2184
## 3of5_23                            0.864 0.7471
## 3of5_24                            0.587 0.3443
## 3of5_25                            0.534 0.2848
## 3of5_26                            0.687 0.4724
## 3of5_27                            0.562 0.3154
## 3of5_28                            0.442 0.1957
## 3of5_29                            0.585 0.3422
## 3of5_30                            0.504 0.2537
## 3of5_31                            0.465 0.2166
## 3of5_32                            0.617 0.3808
## 
## SS loadings:  107 
## Proportion Var:  0.475 
## 
## Factor correlations: 
## 
##    F1
## F1  1
good_items_stats$itemstats$discrim = coef(good_items_fit, simplify = T)$items[, 1] %>% unname()
good_items_stats$itemstats$difficulty = -coef(good_items_fit, simplify = T)$items[, 2] %>% unname()
good_items_stats$itemstats
good_items_stats$itemstats %>% 
  describe2()
#difficulties
good_items_stats$itemstats$mean %>% GG_denhist() +
  scale_x_continuous("Pass rate")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items pass rate.png")

#loadings
good_items_stats$itemstats$g_loading %>% GG_denhist() +
    scale_x_continuous("Factor loading")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items factor loading.png")

#reliability
good_items_fit_scores = fscores(good_items_fit, full.scores.SE = T)
empirical_rxx(good_items_fit_scores)
##    F1 
## 0.977
#which range has >.90?
d_rxx_info = rxx_info = get_reliabilities(good_items_fit)

d_rxx_info %>% 
  filter(rel > .90) %>% 
  describe2()
d_rxx_info %>% 
  filter(rel > .80) %>% 
  describe2()
d_rxx_info %>% 
  ggplot(aes(z, rel)) +
  geom_line() +
  scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
  scale_x_continuous("Ability level (z)") +
  coord_cartesian(xlim = c(-4, 4))

GG_save("figs/good items reliability as function of ability.png")

d_rxx_info %>% 
  filter(z >= -2, z <= 2) %>% 
  describe2()
d_rxx_info %>% 
  filter(z >= -3, z <= 3) %>% 
  describe2()
#difficulty and g-loading
good_items_stats$itemstats %>% 
  rownames_to_column() %>% 
  GG_scatter("mean", "g_loading", case_names = "rowname") +
  xlab("Pass rate") +
  ylab("Factor loading")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/good items scatter pass rate loading.png")
## `geom_smooth()` using formula = 'y ~ x'
#merge g score back to main dataset
d2 = left_join(
  d,
  tibble(id = good_items_scored$id, 
         g = good_items_fit_scores[, 1]),
  by = c("Participant_id" = "id")
)

assert_that(!anyDuplicated(d2$Participant_id))
## [1] TRUE

Norms

#the age problem
GG_scatter(d2, "age", "g") +
  geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#scores by age group
d2$age_group = discretize(d2$age, 3)

GG_group_means(d2, "g", "age_group", type = "boxplot") +
  scale_y_continuous("Vocabulary IRT score") +
  scale_x_discrete("Age group")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items boxplots by age.png")

describe2(d2$g, d2$age_group)
## New names:
## • `` -> `...1`
#whites differ in age?
describe2(d2$age, d2$white_only)
## New names:
## • `` -> `...1`
#white subset is easier to work with
d2_white = d2 %>% filter(white_only)
(white_ageadj_model = lm(g ~ age, data = d2_white))
## 
## Call:
## lm(formula = g ~ age, data = d2_white)
## 
## Coefficients:
## (Intercept)          age  
##     -1.1365       0.0264
#get resids, step 1
d2_white$g_ageadj1 = resid(white_ageadj_model)
(ageadj_desc_whites = describe2(d2_white$g_ageadj1))
#alternative is to just model the absolute resids directly
(absSD_ols_whites = lm(abs(g_ageadj1) ~ age, data = d2_white))
## 
## Call:
## lm(formula = abs(g_ageadj1) ~ age, data = d2_white)
## 
## Coefficients:
## (Intercept)          age  
##     0.48643      0.00535
#get age mean and SD adjusted scores
d2_white$g_ageadj2 = d2_white$g_ageadj1 / predict(absSD_ols_whites)

#does this work tho?
d2_white %>% 
  GG_scatter("age", "g_ageadj2")
## `geom_smooth()` using formula = 'y ~ x'

test_HS(d2_white$g_ageadj2, d2_white$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
#restore to white z score norms
(white_desc_ageadj2_desc = describe2(d2_white$g_ageadj2))
d2_white$g_ageadj3 = (d2_white$g_ageadj2 - white_desc_ageadj2_desc$mean) / white_desc_ageadj2_desc$sd
d2_white$g_ageadj3 %>% describe2()
#Thus finally, we can make IQs using a function
vocab_norms = kirkegaard::make_norms(
  score = d2$g,
  age = d2$age,
  norm_group = d2$race_combos_common == "white"
)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.003**). Model used.
d2$IQ = vocab_norms$data$IQ

#plot results to see if they make sense
d2 %>% 
  GG_denhist("IQ", "white_only")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/good items IQ scores by Whiteness.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
describe2(d2$IQ, d2$white_only)
## New names:
## • `` -> `...1`
#should be no age relationship within groups
d2 %>% 
  GG_scatter("age", "IQ")
## `geom_smooth()` using formula = 'y ~ x'

#and no heteroscedasticity
test_HS(d2$IQ, d2$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

Validation items

d2_mmpi = d2 %>% select(I_am_easily_awakened_by_noise:I_like_movie_love_scenes) %>% 
  map_df(~mapvalues(., from = c("Yes", "No"), to = c(1, 0))) %>% 
  map_df(as.numeric)

#IQ means by MMPI
MMPI_IQ_means = map2_df(d2_mmpi, names(d2_mmpi), function(x, y) {
  # browser()
  desc = suppressMessages(describe2(d2$IQ, group = x))
  
  tibble(
    question = d_vars %>% filter(var_name == y) %>% pull(label),
    yes = desc$mean[2],
    no = desc$mean[1],
    IQ_gap = yes-no,
    abs_IQ_gap = abs(IQ_gap)
  )
})

GG_scatter(d2, "Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct", "IQ") +
  scale_x_continuous("Answer to 'How many items on the test you just took do you think you got correct?'")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/self-estimate vs. IQ.png")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d2, "Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did", "IQ")
## `geom_smooth()` using formula = 'y ~ x'

paired.r(
  cor(d2$IQ, d2$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct, use = "pair"),
  cor(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did, use = "pair"),
  n = pairwiseCount(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did)[1, 1]
)
## Call: paired.r(xy = cor(d2$IQ, d2$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct, 
##     use = "pair"), xz = cor(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did, 
##     use = "pair"), n = pairwiseCount(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did)[1, 
##     1])
## [1] "test of difference between two independent correlations"
## z = 3.94  With probability =  0

Sex DIF

#sex bias
sex_bias_fit = cache_object(filename = "data/sex_bias_fit.rds", expr = DIF_test(
  items = good_items_scored %>% select(-id),
  model = 1,
  group = d2$sex,
  technical = list(NCYCLES = 2000)
))
## Cache found, reading object from disk
#DIF items
sex_bias_fit$DIF_stats %>% filter(p < .05)
sex_bias_fit$DIF_stats %>% filter(p_adj < .05)
sex_bias_fit$effect_size_test
## $liberal
##           Effect Size    Value
## 1                STDS  0.09705
## 2                UTDS  4.73322
## 3              UETSDS  0.51663
## 4               ETSSD  0.00256
## 5         Starks.DTFR  0.14094
## 6               UDTFR  4.60979
## 7              UETSDN  0.54145
## 8 theta.of.max.test.D -2.01111
## 9           Test.Dmax -2.00227
## 
## $conservative
##           Effect Size   Value
## 1                STDS 0.15125
## 2                UTDS 0.97288
## 3              UETSDS 0.23760
## 4               ETSSD 0.00398
## 5         Starks.DTFR 0.17914
## 6               UDTFR 0.90075
## 7              UETSDN 0.24653
## 8 theta.of.max.test.D 0.51284
## 9           Test.Dmax 0.39618
sex_bias_fit$DIF_stats$item_number = seq_along_rows(sex_bias_fit$DIF_stats)

#plot items
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace")

sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))

save_plot_to_file({
  sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))
}, filename = "figs/good items sex DIF.png")
sex_bias_fit$fits$anchor_liberal %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p < .05) %>% pull(item_number))

#plot bias for test
sex_bias_fit$fits$anchor_conservative %>% plot(type = "score")

#sex difference
SMD_matrix(d2$IQ, d2$sex)
##           Male  Female
## Male        NA -0.0684
## Female -0.0684      NA
describe2(d2$IQ, d2$sex)
## New names:
## • `` -> `...1`
var.test(IQ ~ sex, data = d2)
## 
##  F test to compare two variances
## 
## data:  IQ by sex
## F = 1, num df = 238, denom df = 259, p-value = 0.06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.995 1.638
## sample estimates:
## ratio of variances 
##               1.28
#item level pass rate SD by sex
good_items_stats_sexes = itemstats(
  all_items_scored %>% select(-id),
  group = d2$sex
)
## Warning in cor(data, use = "pairwise.complete.obs"): the standard deviation is
## zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation
## is zero

## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation
## is zero
bind_rows(
  good_items_stats_sexes$Male$itemstats %>% mutate(sex = "Men"),
  good_items_stats_sexes$Female$itemstats %>% mutate(sex = "Women")
) %>% 
  GG_denhist("sd", group = "sex")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`

t.test(
  good_items_stats_sexes$Male$itemstats$sd,
  good_items_stats_sexes$Female$itemstats$sd
)
## 
##  Welch Two Sample t-test
## 
## data:  good_items_stats_sexes$Male$itemstats$sd and good_items_stats_sexes$Female$itemstats$sd
## t = 3, df = 441, p-value = 0.002
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.00992 0.04547
## sample estimates:
## mean of x mean of y 
##     0.416     0.388
#white subset
d2_white = d2 %>% filter(race_combos_common == "white")

SMD_matrix(d2_white$IQ, d2_white$sex)
##         Male Female
## Male      NA  0.039
## Female 0.039     NA
describe2(d2_white$IQ, d2_white$sex)
## New names:
## • `` -> `...1`
var.test(IQ ~ sex, data = d2_white)
## 
##  F test to compare two variances
## 
## data:  IQ by sex
## F = 1, num df = 171, denom df = 186, p-value = 0.1
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.957 1.725
## sample estimates:
## ratio of variances 
##               1.28
d2 %>% 
  filter(race_combos_common == "white") %>% 
  GG_denhist("IQ", "sex")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Abbreviation

max_items = 50
max_cycles = 2000
plan(multisession(workers = 7))

#forward optimize for reliability
vocab_abbrev_forward = cache_object(filename = "data/vocab_abbrev_rc50.rds", expr = {
  abbreviate_scale(
    items = good_items_scored %>% select(-id),
    item_target = max_items,
    method = "forwards",
    selection_method = "rc",
    mirt_args = list(
      model = 1,
      itemtype = "2PL",
      verbose = F,
      technical = list(NCYCLES = max_cycles)
    )
  )
}, renew = renew_all)
## Cache found, reading object from disk
#max laoding
#simple
max_loading_basic = abbreviate_scale(
    items = good_items_scored %>% select(-id),
    item_target = max_items,
    method = "max_loading",
    selection_method = "rc",
    mirt_args = list(
      model = 1,
      itemtype = "2PL",
      verbose = F,
      technical = list(NCYCLES = max_cycles)
    )
  )
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## EM cycles terminated after 2000 iterations.
## EM cycles terminated after 2000 iterations.
## 127.639 sec elapsed
#balancing
max_loading_balanced = abbreviate_scale(
    items = good_items_scored %>% select(-id),
    item_target = max_items,
    method = "max_loading",
    difficulty_balance_groups = 5,
    selection_method = "rc",
    mirt_args = list(
      model = 1,
      itemtype = "2PL",
      verbose = F,
      technical = list(NCYCLES = max_cycles)
    )
  )
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## EM cycles terminated after 2000 iterations.
## 99.324 sec elapsed
#residualization
max_loading_resid = abbreviate_scale(
    items = good_items_scored %>% select(-id),
    item_target = max_items,
    method = "max_loading",
    residualize_loadings = T,
    selection_method = "rc",
    mirt_args = list(
      model = 1,
      itemtype = "2PL",
      verbose = F,
      technical = list(NCYCLES = max_cycles)
    )
  )
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## EM cycles terminated after 2000 iterations.
## 97.404 sec elapsed
#plot results
abbrev_results = bind_rows(
  vocab_abbrev_forward$best_sets %>% mutate(method = "step forward"),
  max_loading_basic$best_sets %>% mutate(method = "max loading, basic"),
  max_loading_balanced$best_sets %>% mutate(method = "max loading, balanced"),
  max_loading_resid$best_sets %>% mutate(method = "max loading, resid")
) %>% 
  select(reliability, r_full_score, method, items_in_scale, criterion_value) %>% 
  pivot_longer(
    cols = c("reliability", "r_full_score", "criterion_value"),
    names_to = "criterion",
    values_to = "value"
  ) %>% 
  mutate(
    criterion = case_when(
      criterion == "reliability" ~ "Reliability",
      criterion == "r_full_score" ~ "Cor. with full score",
      criterion == "criterion_value" ~ "Combined index"
      )
    )

abbrev_results %>%
  ggplot(aes(items_in_scale, value, color = method)) +
  geom_line() +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, .1)) +
  # geom_point() +
  facet_wrap("criterion")

GG_save("figs/abbreviation_comparison.png")

#make norms for abbreviated scales
abbrev_scales_items = c(10, 15, 20, 25, 30)
abbrev_scales_norms = map(abbrev_scales_items, function(item_count) {
  make_norms(
    score = vocab_abbrev_forward$best_sets %>% filter(items_in_scale == item_count) %>% extract2("scores") %>% extract2(1) %>% extract(, 1),
    age = d2$age,
    norm_group = d2$white_only,
    p_value = .05
  )
}) %>% set_names("scale_" + abbrev_scales_items)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.035). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.034). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = <0.001***). Model used.
#move scores into main dataset
for (scale in abbrev_scales_items) {
  d2[["vocab_IQ_" + scale]] = abbrev_scales_norms[["scale_" + scale]]$data$IQ
}

#verify that age norming was done correctly by checking for age effects and white mean/SD


describe2(d2$vocab_IQ_30, d2$white_only)
## New names:
## • `` -> `...1`
#linear effect of age?
GG_scatter(d2, "age", "vocab_IQ_30")
## `geom_smooth()` using formula = 'y ~ x'

#and no heteroscedasticity
test_HS(d2$vocab_IQ_30, d2$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

Meta

#versions
write_sessioninfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_DK.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_DK.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_DK.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Copenhagen
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] furrr_0.3.1           future_1.33.2         ggeffects_1.5.1      
##  [4] rms_6.8-0             googlesheets4_1.1.1   mirt_1.41.8          
##  [7] lattice_0.22-5        readxl_1.4.3          kirkegaard_2024-04-23
## [10] psych_2.4.3           assertthat_0.2.1      weights_1.0.4        
## [13] Hmisc_5.1-2           magrittr_2.0.3        lubridate_1.9.3      
## [16] forcats_1.0.0         stringr_1.5.1         dplyr_1.1.4          
## [19] purrr_1.0.2           readr_2.1.5           tidyr_1.3.1          
## [22] tibble_3.2.1          ggplot2_3.5.0         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.16.0    jsonlite_1.8.8       shape_1.4.6.1       
##   [4] TH.data_1.1-2        jomo_2.7-6           farver_2.1.1        
##   [7] nloptr_2.0.3         rmarkdown_2.26       ragg_1.3.0          
##  [10] fs_1.6.3             vctrs_0.6.5          minqa_1.2.6         
##  [13] base64enc_0.1-3      htmltools_0.5.8.1    polspline_1.1.24    
##  [16] tictoc_1.2.1         broom_1.0.5          cellranger_1.1.0    
##  [19] Formula_1.2-5        mitml_0.4-5          dcurver_0.9.2       
##  [22] sass_0.4.9           parallelly_1.37.1    bslib_0.7.0         
##  [25] htmlwidgets_1.6.4    plyr_1.8.9           sandwich_3.1-0      
##  [28] zoo_1.8-12           cachem_1.0.8         lifecycle_1.0.4     
##  [31] iterators_1.0.14     pkgconfig_2.0.3      Matrix_1.6-5        
##  [34] R6_2.5.1             fastmap_1.1.1        digest_0.6.35       
##  [37] colorspace_2.1-0     textshaping_0.3.7    vegan_2.6-4         
##  [40] labeling_0.4.3       fansi_1.0.6          timechange_0.3.0    
##  [43] gdata_3.0.0          mgcv_1.9-1           compiler_4.3.3      
##  [46] gargle_1.5.2         bit64_4.0.5          withr_3.0.0         
##  [49] htmlTable_2.4.2      backports_1.4.1      highr_0.10          
##  [52] pan_1.9              MASS_7.3-60          quantreg_5.97       
##  [55] GPArotation_2024.3-1 gtools_3.9.5         permute_0.9-7       
##  [58] tools_4.3.3          foreign_0.8-86       googledrive_2.1.1   
##  [61] nnet_7.3-19          glue_1.7.0           nlme_3.1-163        
##  [64] grid_4.3.3           checkmate_2.3.1      cluster_2.1.6       
##  [67] generics_0.1.3       gtable_0.3.4         tzdb_0.4.0          
##  [70] data.table_1.15.4    hms_1.1.3            Deriv_4.1.3         
##  [73] utf8_1.2.4           foreach_1.5.2        pillar_1.9.0        
##  [76] vroom_1.6.5          splines_4.3.3        survival_3.5-8      
##  [79] bit_4.0.5            SparseM_1.81         tidyselect_1.2.1    
##  [82] pbapply_1.7-2        knitr_1.45           gridExtra_2.3       
##  [85] xfun_0.43            stringi_1.8.3        yaml_2.3.8          
##  [88] boot_1.3-30          evaluate_0.23        codetools_0.2-19    
##  [91] cli_3.6.2            rpart_4.1.23         systemfonts_1.0.6   
##  [94] munsell_0.5.1        jquerylib_0.1.4      Rcpp_1.0.12         
##  [97] globals_0.16.3       parallel_4.3.3       MatrixModels_0.5-3  
## [100] lme4_1.1-35.2        listenv_0.9.1        glmnet_4.1-8        
## [103] mvtnorm_1.2-4        scales_1.3.0         insight_0.19.10     
## [106] crayon_1.5.2         rlang_1.1.3          multcomp_1.4-25     
## [109] mnormt_2.1.1         mice_3.16.0
#write main data to file for reuse
d2 %>% write_rds("data/main data.rds", compress = "xz")

#save scored items
good_items_scored %>% write_rds("data/item data.rds", compress = "xz")

#save norms
vocab_norms %>% write_rds("data/vocab norms.rds", compress = "xz")
abbrev_scales_norms %>% write_rds("data/vocab abbrev norms.rds", compress = "xz")


#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/6gcy4/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "vocab.Rmd", "vocab.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}