This analysis takes care of bad data (by removing them). About 58 individuals failed item checks (4 items) or failed items that are exceedingly easy (4 items). This reflects both low motivation and/or poor English. These 8 items are also removed from the analysis. Results are similar to the previous analysis that include all individuals.

Data preparation

Sys.setenv(LANG = "en") # make R environment in english

library(ggpubr)
library(kirkegaard)
library(tictoc)
load_packages(
readxl,
mirt,
googlesheets4,
rms,
ggeffects,
future, furrr
)

theme_set(theme_bw())

options(
digits = 3
)

mirtCluster()
## mirtCluster() previously defined for 31 clusters
plan(multisession(workers = 7))

#delete cache and re-run all (=T if you already ran this code before, or else the cache will contain outdated data)
renew_all = F

# This code does the same thing, clearing cache
# List of RDS files to be cleared
# rds_files <- c(
# "data/vocab_irt.rds",
# "data/hard_items_fit.rds",
# "data/all_items_fit.rds",
# "data/good_items_fit.rds",
# "data/sex_bias_fit.rds",
# "data/vocab_abbrev_rc50.rds"
# )

# Function to remove RDS files if they exist
clear_rds_files <- function(files) {
for (file in files) {
if (file.exists(file)) {
file.remove(file)
message(paste("Removed:", file))
} else {
message(paste("File not found:", file))
}
}
}

# Clear the RDS files
# clear_rds_files(rds_files)

Functions

#coverage of a given minimum of reliability
reliability_range = function(x, min_reliability) {
  map_df(min_reliability, \(m) {
    x %>% 
    filter(rel >= m) %$%
    tibble(
      min_reliability = m,
      lower_z = min(z),
      upper_z = max(z),
      coverage = pnorm(upper_z) - pnorm(lower_z)
    )
  })
}

#find item choices based on item names
find_items = function(x) {
  
  items = map_chr(x, function(y) {
    #item is from different objects depending on the name
    if (!str_detect(y, "of5_")) {
      easy_items_choices[y]
    } else if (str_detect(y, "1of5_")) {
      hard_items_1of5_options[y]
    } else if (str_detect(y, "2of5_")) {
      hard_items_2of5_options[y]
    } else if (str_detect(y, "3of5_")) {
      hard_items_3of5_options[y]
    }
  })
  
  names(items) = x
  
  items
  }
  
  #if any of the xof5 items, they are from hard pool

Data

if (F) {
d_prolific = read_csv("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\prolific_export_64877cb0ca7d649ce538e74d.csv") %>% df_legalize_names()
d_alchemer = read_csv("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\20230618204902-SurveyExport.csv") %>% df_legalize_names() %>% filter(Status == "Complete")

assert_that(!any(duplicated(d_prolific$Participant_id)))
assert_that(!any(duplicated(d_alchemer$Write_in_your_Prolific_ID)))
d_alchemer$Write_in_your_Prolific_ID %>% table2()

#join on prolific id
d = inner_join(
d_prolific,
d_alchemer,
by = c("Participant_id" = "Write_in_your_Prolific_ID")
)

assert_that(!any(duplicated(d$Participant_id)))

d %>%
select(
-Submission_id,
-IP_Address,
-Referer,
-SessionID,
-User_Agent
) %>%
write_rds("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\raw data.rds", compress = "xz")
}

#load prepared data without sensitive information
d = read_rds("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\raw data.rds")

d$native_speaker = d$Is_English_one_of_your_native_languages == "Yes"
table2(d$native_speaker)
## # A tibble: 3 × 3
##   Group Count Percent
##   <chr> <dbl>   <dbl>
## 1 TRUE    491   98.4 
## 2 FALSE     8    1.60
## 3 <NA>      0    0
#recode covariates
d$age = d$How_old_are_you %>% str_match("\\d+") %>% as.numeric()
d$sex = d$What_is_your_biological_sex %>% mapvalues(from = c("Male (Y chromosome)", "Female (no Y chromosome)"), to = c("Male", "Female")) %>% factor(levels = c("Male", "Female"))

ethnicity_vars = d %>% select(White_European_Which_of_the_following_racial_ethnic_groups_do_you_identify_with:Other_Which_of_the_following_racial_ethnic_groups_do_you_identify_with) %>% names()
d$white = d$White_European_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$jewish = d$Jewish_Ashkenazi_Sephardic_Misrahi_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$black = d$Black_African_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$hispanic = d$Hispanic_Latino_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$middle_eastern = d$Middle_Eastern_North_African_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$south_asian = d$South_Asian_Indian_subcontinent_excluding_Pakistani_and_Bangladeshi_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$southest_asian = d$Southeast_Asian_Bangladeshi_Vietnamese_Burmese_etc_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$east_asian = d$East_Asian_Chinese_Korean_Japanese_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$polynesian = d$Polynesian_Pacific_Islander_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$native_american = d$Native_American_Amerindian_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$other_race = d$Other_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
ethnicity_simple_vars = d %>% select(white:other_race) %>% names() %>% factor()
d$white_only = as.logical(d$white) & (d %>% select(white:other_race) %>% select(-white) %>% map_df(as.logical) %>% rowSums() %>% equals(0))
d$black_only = as.logical(d$black) & (d %>% select(white:other_race) %>% select(-black) %>% map_df(as.logical) %>% rowSums() %>% equals(0))

#combinations
d$race_combos = d %>% select(white:other_race) %>% encode_combinations()
d$race_combos %>% table2()
## # A tibble: 24 × 3
##    Group                  Count Percent
##    <chr>                  <dbl>   <dbl>
##  1 white                    359  71.9  
##  2 black                     63  12.6  
##  3 east_asian                19   3.81 
##  4 hispanic                  14   2.81 
##  5 white, hispanic            9   1.80 
##  6 south_asian                5   1.00 
##  7 white, jewish              5   1.00 
##  8 southest_asian             4   0.802
##  9 black, native_american     2   0.401
## 10 middle_eastern             2   0.401
## # ℹ 14 more rows
#common combos
d$race_combos_common = d$race_combos %>% fct_lump_min(min = 9)
d$race_combos_common %>% table2()
## # A tibble: 7 × 3
##   Group           Count Percent
##   <chr>           <dbl>   <dbl>
## 1 white             359   71.9 
## 2 black              63   12.6 
## 3 Other              35    7.01
## 4 east_asian         19    3.81
## 5 hispanic           14    2.81
## 6 white, hispanic     9    1.80
## 7 <NA>                0    0
d$sex %>% table2()
## # A tibble: 3 × 3
##   Group  Count Percent
##   <chr>  <dbl>   <dbl>
## 1 Female   260    52.1
## 2 Male     239    47.9
## 3 <NA>       0     0

Remove unecessary items

#table
d_vars = df_var_table(d)

items_of_interest <- c("silly", "avoid", "remove", "construct")
answer_counts <- d %>%
select(all_of(items_of_interest)) %>%
pivot_longer(cols = everything(), names_to = "item", values_to = "response") %>%
filter(!is.na(response)) %>%
group_by(item, response) %>%
summarise(count = n(), .groups = "drop") %>%
arrange(item, desc(count))
print(answer_counts)
## # A tibble: 17 × 3
##    item      response count
##    <chr>     <chr>    <int>
##  1 avoid     evade      493
##  2 avoid     ddd          2
##  3 avoid     ggg          2
##  4 avoid     fff          1
##  5 avoid     hhh          1
##  6 construct create     496
##  7 construct ggg          2
##  8 construct ddd          1
##  9 remove    abolish    493
## 10 remove    ddd          2
## 11 remove    fff          2
## 12 remove    hhh          2
## 13 silly     childish   491
## 14 silly     hhh          3
## 15 silly     ddd          2
## 16 silly     ggg          2
## 17 silly     fff          1
d <- d %>% filter(silly == "childish" & avoid == "evade" & remove == "abolish" & construct == "create" & evil == "wicked" & cheerful =="happy" & aroma == "odor" & farewell == "adieu") # remove people who failed either item checks or extremely easy items
nrow(d) # 441 cases
## [1] 441
easy_items = d %>% select(evil:cheat) %>% select(-c(aroma, cheerful, evil, farewell)) # skip the 4 attention checks and 4 obvious answers

IRT on easy items

# Easy items
scoring_key = read_excel("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\answer keys for 155 items.xlsx", col_names = c("word", "correct", "notes"))
items_to_remove <- c("silly", "avoid", "remove", "construct", "aroma", "cheerful", "evil", "farewell")
scoring_key <- scoring_key %>%
filter(!word %in% items_to_remove) %>%
arrange(word)
nrow(scoring_key) # 151 items
## [1] 151
#sort data colnames alphabetically
colnames_clean = easy_items %>% colnames() %>% str_clean()
order(colnames_clean)
##   [1]  71  32  80  56  53 125 137   4  34  39 132 104 116   1  65  87 110 121  79 130  90  22  97  37  41  81 148  75  44  94  27
##  [32] 108  21 147 144  96  47 129 151  59   9  43  66  51 101  42  60  54 100  38  33 123  12  92 103   7 114 143   6  95 118 112
##  [63]  61 124  19  26   3  57  14 141 150  76   8  83 122  68  16  24 105  28  35 117  78  46  30  49  86  62  23  99 142  18  72
##  [94]   5  36 128  11 140  91  10  20  52  69 134  50  77  70  48   2  40  29 107  63 149 109 145  67  88  82  84 106 146  45 131
## [125] 133 120  64 102 119 127 111 115  15  93  17  58  55  98 136  89 113  13  73 139 138  25 126 135  31  85  74
easy_items_sorted = easy_items[, order(colnames_clean)]

#check matches
vocab_matches = tibble(
scoring_key_word = scoring_key$word,
scoring_key_correct = scoring_key$correct,
data = easy_items_sorted %>% colnames()
)

#stubborn is duplicated, but fortunately in the right order
easy_items_scored = score_items(
easy_items_sorted,
scoring_key$correct
)
easy_items_table = map_df(easy_items, table2)

#count of correct
d$vocab_sumscore = rowSums(easy_items_scored)

vocab_irt = cache_object(filename = "data/vocab_irt.rds", expr = mirt(
easy_items_scored,
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 5000),
optimizer = "BFGS",
method = "EM",
guess = .20
),
renew = renew_all)
## Cache found, reading object from disk
#results
vocab_irt
## 
## Call:
## mirt(data = easy_items_scored, model = 1, itemtype = "2PL", guess = 0.2, 
##     method = "EM", optimizer = "BFGS", technical = list(NCYCLES = 5000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 301 EM iterations.
## mirt version: 1.42 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -23138
## Estimated parameters: 302 
## AIC = 46880
## BIC = 48115; SABIC = 47157
## G2 (1e+10) = 40930, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
vocab_irt %>% summary()
##                                       F1     h2
## a_specific_number                  0.460 0.2116
## a_type_of_brass_instrument         0.846 0.7153
## a_type_of_drapery                  0.663 0.4401
## a_type_of_fabric_101               0.718 0.5152
## a_type_of_fabric_98                0.744 0.5534
## a_type_of_grasshopper              0.819 0.6704
## a_type_of_hat                      0.775 0.6003
## a_type_of_heating_unit             0.517 0.2670
## a_type_of_magistrate_position      0.749 0.5613
## a_type_of_mathematical_operation   0.548 0.3001
## a_type_of_overshoe                 0.569 0.3237
## a_type_of_plant                    0.760 0.5783
## a_type_of_religious_teacher        0.765 0.5853
## a_type_of_rock                     0.327 0.1066
## a_type_of_skirts                   0.670 0.4490
## a_type_of_smokeless_powder         0.481 0.2312
## a_type_of_sound                    0.551 0.3033
## a_type_of_sword                    0.424 0.1795
## a_type_of_volcanic_crater          0.641 0.4114
## a_type_of_wavy_form                0.784 0.6147
## actionable_negligence              0.759 0.5757
## advisory                           0.645 0.4166
## advocate                           0.717 0.5142
## almighty                           0.863 0.7449
## amazement                          0.622 0.3867
## amenability                        0.755 0.5702
## auspices                           0.821 0.6733
## barely_able_to_read_and_write      0.829 0.6877
## because                            0.674 0.4540
## berate                             0.821 0.6746
## blade                              0.735 0.5404
## blend                              0.662 0.4381
## blunder                            0.802 0.6424
## bow                                0.859 0.7387
## brief                              0.528 0.2790
## bring_about                        0.782 0.6118
## carelessly_or_hastily_put_together 0.610 0.3717
## celebration                        0.727 0.5287
## cheat                              0.841 0.7066
## clay_pigeon_shooting               0.735 0.5403
## coarse                             0.760 0.5771
## collect_or_study_insects           0.624 0.3888
## colossal                           0.725 0.5262
## commotion                          0.916 0.8392
## complainer                         0.820 0.6720
## confiscate                         0.905 0.8183
## congratulate                       0.594 0.3531
## contemplate                        0.456 0.2081
## convoy                             0.889 0.7912
## couch                              0.696 0.4842
## crease                             0.755 0.5701
## cunning                            0.662 0.4378
## deceit                             0.626 0.3919
## deliberately                       0.776 0.6022
## deprive                            0.617 0.3807
## detectable                         0.324 0.1053
## diatribes                          0.683 0.4662
## disjoined                          0.422 0.1783
## disrupt                            0.747 0.5579
## distinct                           0.272 0.0741
## divergence                         0.851 0.7246
## dome                               0.576 0.3317
## downfall                           0.729 0.5316
## drink                              0.901 0.8115
## elite                              0.715 0.5105
## embarrassment                      0.588 0.3453
## emphasize                          0.820 0.6716
## empire                             0.522 0.2729
## empty                              0.735 0.5400
## entanglement                       0.785 0.6162
## environment                        0.841 0.7070
## excite                             0.793 0.6283
## flammable                          0.871 0.7590
## flatterer                          0.868 0.7532
## flatteries                         0.668 0.4459
## forever                            0.791 0.6263
## frequent                           0.140 0.0197
## gigantic                           0.710 0.5045
## girl                               0.762 0.5803
## goo                                0.668 0.4460
## goodbye                            0.804 0.6463
## greed                              0.916 0.8382
## groan                              0.814 0.6626
## gruesome                           0.667 0.4454
## guarantee                          0.282 0.0794
## gutter                             0.631 0.3982
## harmfulness                        0.493 0.2428
## hobby                              0.799 0.6383
## hut                                0.775 0.6007
## illness                            0.538 0.2896
## impromptu                          0.753 0.5677
## indescribable                      0.717 0.5139
## intellectual                       0.707 0.4998
## jargon                             0.903 0.8161
## knowledgeable                      0.829 0.6869
## lackadaisical                      0.654 0.4280
## manager                            0.849 0.7200
## meal                               0.916 0.8386
## melodic                            0.741 0.5483
## mutually                           0.836 0.6984
## nonsense                           0.847 0.7176
## nonsensical                        0.812 0.6588
## not_coveted                        0.669 0.4469
## pamper                             0.819 0.6711
## penitentiary                       0.553 0.3062
## perplexing                         0.658 0.4328
## persistence                        0.639 0.4090
## predetermine                       0.524 0.2746
## pretender                          0.798 0.6364
## questioning                        0.641 0.4113
## quickly                            0.667 0.4455
## rebellious                         0.799 0.6391
## referee                            0.739 0.5462
## referendum                         0.695 0.4834
## relating_to_the_right              0.401 0.1610
## relevant                           0.753 0.5668
## respectful                         0.760 0.5775
## retailer                           0.593 0.3519
## retract                            0.519 0.2696
## ropes                              0.802 0.6440
## sag                                0.625 0.3907
## schemer                            0.791 0.6258
## seize                              0.775 0.6006
## sensitivity                        0.737 0.5426
## shadows                            0.723 0.5234
## sketch                             0.697 0.4861
## slang                              0.812 0.6599
## slave                              0.507 0.2570
## sluggish                           0.670 0.4492
## somber                             0.702 0.4931
## spinelessness                      0.628 0.3939
## sporadic                           0.890 0.7926
## squad                              0.943 0.8887
## stagger                            0.664 0.4412
## stinking                           0.906 0.8203
## stroll                             0.900 0.8092
## stubborn_100                       0.539 0.2902
## stubborn_143                       0.639 0.4078
## stylish                            0.914 0.8358
## summit                             0.744 0.5539
## terminology                        0.878 0.7715
## the_science_of_speech_sounds       0.708 0.5020
## transportation                     0.367 0.1349
## tyrant                             0.874 0.7643
## unhealthful                        0.647 0.4180
## vile                               0.875 0.7656
## vulgar                             0.894 0.7988
## wandering                          0.522 0.2729
## warning                            0.621 0.3854
## wave                               0.727 0.5283
## weaponry                           0.681 0.4642
## 
## SS loadings:  77.8 
## Proportion Var:  0.515 
## 
## Factor correlations: 
## 
##    F1
## F1  1
vocab_irt %>% coef(simplify = T)
## $items
##                                       a1      d   g u
## a_specific_number                  0.882  1.610 0.2 1
## a_type_of_brass_instrument         2.698  4.314 0.2 1
## a_type_of_drapery                  1.509  1.782 0.2 1
## a_type_of_fabric_101               1.755  3.420 0.2 1
## a_type_of_fabric_98                1.894  1.340 0.2 1
## a_type_of_grasshopper              2.427  2.352 0.2 1
## a_type_of_hat                      2.086 -2.000 0.2 1
## a_type_of_heating_unit             1.027  1.959 0.2 1
## a_type_of_magistrate_position      1.925  2.926 0.2 1
## a_type_of_mathematical_operation   1.115  2.647 0.2 1
## a_type_of_overshoe                 1.178  0.231 0.2 1
## a_type_of_plant                    1.993  2.492 0.2 1
## a_type_of_religious_teacher        2.022  2.449 0.2 1
## a_type_of_rock                     0.588  2.807 0.2 1
## a_type_of_skirts                   1.536  1.840 0.2 1
## a_type_of_smokeless_powder         0.933  0.958 0.2 1
## a_type_of_sound                    1.123  0.511 0.2 1
## a_type_of_sword                    0.796  0.000 0.2 1
## a_type_of_volcanic_crater          1.423  2.042 0.2 1
## a_type_of_wavy_form                2.150  0.265 0.2 1
## actionable_negligence              1.982  2.264 0.2 1
## advisory                           1.438  3.705 0.2 1
## advocate                           1.751  1.251 0.2 1
## almighty                           2.908  5.449 0.2 1
## amazement                          1.352  2.775 0.2 1
## amenability                        1.960  2.132 0.2 1
## auspices                           2.443 -0.851 0.2 1
## barely_able_to_read_and_write      2.526  3.199 0.2 1
## because                            1.552  2.836 0.2 1
## berate                             2.450  2.833 0.2 1
## blade                              1.846  3.592 0.2 1
## blend                              1.503  2.804 0.2 1
## blunder                            2.281  4.692 0.2 1
## bow                                2.861  0.075 0.2 1
## brief                              1.059 -0.158 0.2 1
## bring_about                        2.137  2.713 0.2 1
## carelessly_or_hastily_put_together 1.309  2.093 0.2 1
## celebration                        1.803  1.342 0.2 1
## cheat                              2.641  1.421 0.2 1
## clay_pigeon_shooting               1.845  3.698 0.2 1
## coarse                             1.988  3.786 0.2 1
## collect_or_study_insects           1.357  3.022 0.2 1
## colossal                           1.794  1.898 0.2 1
## commotion                          3.889  3.976 0.2 1
## complainer                         2.436  2.559 0.2 1
## confiscate                         3.611  4.538 0.2 1
## congratulate                       1.257  2.274 0.2 1
## contemplate                        0.872  2.238 0.2 1
## convoy                             3.313  2.184 0.2 1
## couch                              1.649  1.258 0.2 1
## crease                             1.960  2.124 0.2 1
## cunning                            1.502  1.493 0.2 1
## deceit                             1.366  1.041 0.2 1
## deliberately                       2.094  2.018 0.2 1
## deprive                            1.334  1.802 0.2 1
## detectable                         0.584  3.263 0.2 1
## diatribes                          1.591  0.153 0.2 1
## disjoined                          0.793 -0.608 0.2 1
## disrupt                            1.912  4.194 0.2 1
## distinct                           0.482  1.599 0.2 1
## divergence                         2.761  1.184 0.2 1
## dome                               1.199  0.986 0.2 1
## downfall                           1.813  2.852 0.2 1
## drink                              3.532  2.321 0.2 1
## elite                              1.738  2.188 0.2 1
## embarrassment                      1.236  2.123 0.2 1
## emphasize                          2.434  5.042 0.2 1
## empire                             1.043  1.478 0.2 1
## empty                              1.844  2.243 0.2 1
## entanglement                       2.156 -0.359 0.2 1
## environment                        2.644 -0.933 0.2 1
## excite                             2.213  1.108 0.2 1
## flammable                          3.020  7.082 0.2 1
## flatterer                          2.973  3.064 0.2 1
## flatteries                         1.527  1.037 0.2 1
## forever                            2.203  2.708 0.2 1
## frequent                           0.241  3.562 0.2 1
## gigantic                           1.717  3.767 0.2 1
## girl                               2.001  2.695 0.2 1
## goo                                1.527  4.174 0.2 1
## goodbye                            2.301  4.245 0.2 1
## greed                              3.873  0.885 0.2 1
## groan                              2.385  3.581 0.2 1
## gruesome                           1.525  1.879 0.2 1
## guarantee                          0.500  2.440 0.2 1
## gutter                             1.384  3.210 0.2 1
## harmfulness                        0.964  1.907 0.2 1
## hobby                              2.261  2.937 0.2 1
## hut                                2.088  2.944 0.2 1
## illness                            1.087  0.755 0.2 1
## impromptu                          1.951  0.254 0.2 1
## indescribable                      1.750  3.903 0.2 1
## intellectual                       1.701  3.537 0.2 1
## jargon                             3.586  6.195 0.2 1
## knowledgeable                      2.521  2.406 0.2 1
## lackadaisical                      1.472  1.049 0.2 1
## manager                            2.729  6.548 0.2 1
## meal                               3.880  0.217 0.2 1
## melodic                            1.875  2.530 0.2 1
## mutually                           2.590  5.263 0.2 1
## nonsense                           2.713  3.275 0.2 1
## nonsensical                        2.365  5.236 0.2 1
## not_coveted                        1.530  2.076 0.2 1
## pamper                             2.431 -1.368 0.2 1
## penitentiary                       1.131  2.619 0.2 1
## perplexing                         1.487  2.696 0.2 1
## persistence                        1.416  2.108 0.2 1
## predetermine                       1.047  2.530 0.2 1
## pretender                          2.252  6.440 0.2 1
## questioning                        1.423  2.586 0.2 1
## quickly                            1.526  1.461 0.2 1
## rebellious                         2.265  2.390 0.2 1
## referee                            1.867  2.902 0.2 1
## referendum                         1.646 -1.498 0.2 1
## relating_to_the_right              0.746  0.578 0.2 1
## relevant                           1.947  0.903 0.2 1
## respectful                         1.990  2.453 0.2 1
## retailer                           1.254  1.425 0.2 1
## retract                            1.034  2.021 0.2 1
## ropes                              2.289  2.696 0.2 1
## sag                                1.363  0.870 0.2 1
## schemer                            2.201 -0.047 0.2 1
## seize                              2.087  2.639 0.2 1
## sensitivity                        1.854  1.115 0.2 1
## shadows                            1.784  0.718 0.2 1
## sketch                             1.655  0.955 0.2 1
## slang                              2.371  2.278 0.2 1
## slave                              1.001  1.139 0.2 1
## sluggish                           1.537  1.102 0.2 1
## somber                             1.679  0.479 0.2 1
## spinelessness                      1.372  1.751 0.2 1
## sporadic                           3.327  0.969 0.2 1
## squad                              4.809 11.250 0.2 1
## stagger                            1.512  1.304 0.2 1
## stinking                           3.637  3.821 0.2 1
## stroll                             3.505  1.631 0.2 1
## stubborn_100                       1.088  1.922 0.2 1
## stubborn_143                       1.412  2.221 0.2 1
## stylish                            3.840 -1.702 0.2 1
## summit                             1.897  2.434 0.2 1
## terminology                        3.127  1.778 0.2 1
## the_science_of_speech_sounds       1.709  4.094 0.2 1
## transportation                     0.672  0.331 0.2 1
## tyrant                             3.064 -3.522 0.2 1
## unhealthful                        1.443 -1.265 0.2 1
## vile                               3.076  5.142 0.2 1
## vulgar                             3.391  0.034 0.2 1
## wandering                          1.043 -0.228 0.2 1
## warning                            1.348  2.430 0.2 1
## wave                               1.801  1.924 0.2 1
## weaponry                           1.584  1.913 0.2 1
## 
## $means
## F1 
##  0 
## 
## $cov
##    F1
## F1  1
vocab_irt_scores = fscores(vocab_irt, full.scores.SE = T)
empirical_rxx(vocab_irt_scores)
##    F1 
## 0.958
plot(vocab_irt, type = "rxx")

d$g_easy = vocab_irt_scores[, 1] %>% standardize()
vocab_item_stats = itemstats(easy_items_scored)
vocab_item_stats$itemstats$g_loading = vocab_irt %>% summary() %>% .$rotF %>% .[, 1]
##                                       F1     h2
## a_specific_number                  0.460 0.2116
## a_type_of_brass_instrument         0.846 0.7153
## a_type_of_drapery                  0.663 0.4401
## a_type_of_fabric_101               0.718 0.5152
## a_type_of_fabric_98                0.744 0.5534
## a_type_of_grasshopper              0.819 0.6704
## a_type_of_hat                      0.775 0.6003
## a_type_of_heating_unit             0.517 0.2670
## a_type_of_magistrate_position      0.749 0.5613
## a_type_of_mathematical_operation   0.548 0.3001
## a_type_of_overshoe                 0.569 0.3237
## a_type_of_plant                    0.760 0.5783
## a_type_of_religious_teacher        0.765 0.5853
## a_type_of_rock                     0.327 0.1066
## a_type_of_skirts                   0.670 0.4490
## a_type_of_smokeless_powder         0.481 0.2312
## a_type_of_sound                    0.551 0.3033
## a_type_of_sword                    0.424 0.1795
## a_type_of_volcanic_crater          0.641 0.4114
## a_type_of_wavy_form                0.784 0.6147
## actionable_negligence              0.759 0.5757
## advisory                           0.645 0.4166
## advocate                           0.717 0.5142
## almighty                           0.863 0.7449
## amazement                          0.622 0.3867
## amenability                        0.755 0.5702
## auspices                           0.821 0.6733
## barely_able_to_read_and_write      0.829 0.6877
## because                            0.674 0.4540
## berate                             0.821 0.6746
## blade                              0.735 0.5404
## blend                              0.662 0.4381
## blunder                            0.802 0.6424
## bow                                0.859 0.7387
## brief                              0.528 0.2790
## bring_about                        0.782 0.6118
## carelessly_or_hastily_put_together 0.610 0.3717
## celebration                        0.727 0.5287
## cheat                              0.841 0.7066
## clay_pigeon_shooting               0.735 0.5403
## coarse                             0.760 0.5771
## collect_or_study_insects           0.624 0.3888
## colossal                           0.725 0.5262
## commotion                          0.916 0.8392
## complainer                         0.820 0.6720
## confiscate                         0.905 0.8183
## congratulate                       0.594 0.3531
## contemplate                        0.456 0.2081
## convoy                             0.889 0.7912
## couch                              0.696 0.4842
## crease                             0.755 0.5701
## cunning                            0.662 0.4378
## deceit                             0.626 0.3919
## deliberately                       0.776 0.6022
## deprive                            0.617 0.3807
## detectable                         0.324 0.1053
## diatribes                          0.683 0.4662
## disjoined                          0.422 0.1783
## disrupt                            0.747 0.5579
## distinct                           0.272 0.0741
## divergence                         0.851 0.7246
## dome                               0.576 0.3317
## downfall                           0.729 0.5316
## drink                              0.901 0.8115
## elite                              0.715 0.5105
## embarrassment                      0.588 0.3453
## emphasize                          0.820 0.6716
## empire                             0.522 0.2729
## empty                              0.735 0.5400
## entanglement                       0.785 0.6162
## environment                        0.841 0.7070
## excite                             0.793 0.6283
## flammable                          0.871 0.7590
## flatterer                          0.868 0.7532
## flatteries                         0.668 0.4459
## forever                            0.791 0.6263
## frequent                           0.140 0.0197
## gigantic                           0.710 0.5045
## girl                               0.762 0.5803
## goo                                0.668 0.4460
## goodbye                            0.804 0.6463
## greed                              0.916 0.8382
## groan                              0.814 0.6626
## gruesome                           0.667 0.4454
## guarantee                          0.282 0.0794
## gutter                             0.631 0.3982
## harmfulness                        0.493 0.2428
## hobby                              0.799 0.6383
## hut                                0.775 0.6007
## illness                            0.538 0.2896
## impromptu                          0.753 0.5677
## indescribable                      0.717 0.5139
## intellectual                       0.707 0.4998
## jargon                             0.903 0.8161
## knowledgeable                      0.829 0.6869
## lackadaisical                      0.654 0.4280
## manager                            0.849 0.7200
## meal                               0.916 0.8386
## melodic                            0.741 0.5483
## mutually                           0.836 0.6984
## nonsense                           0.847 0.7176
## nonsensical                        0.812 0.6588
## not_coveted                        0.669 0.4469
## pamper                             0.819 0.6711
## penitentiary                       0.553 0.3062
## perplexing                         0.658 0.4328
## persistence                        0.639 0.4090
## predetermine                       0.524 0.2746
## pretender                          0.798 0.6364
## questioning                        0.641 0.4113
## quickly                            0.667 0.4455
## rebellious                         0.799 0.6391
## referee                            0.739 0.5462
## referendum                         0.695 0.4834
## relating_to_the_right              0.401 0.1610
## relevant                           0.753 0.5668
## respectful                         0.760 0.5775
## retailer                           0.593 0.3519
## retract                            0.519 0.2696
## ropes                              0.802 0.6440
## sag                                0.625 0.3907
## schemer                            0.791 0.6258
## seize                              0.775 0.6006
## sensitivity                        0.737 0.5426
## shadows                            0.723 0.5234
## sketch                             0.697 0.4861
## slang                              0.812 0.6599
## slave                              0.507 0.2570
## sluggish                           0.670 0.4492
## somber                             0.702 0.4931
## spinelessness                      0.628 0.3939
## sporadic                           0.890 0.7926
## squad                              0.943 0.8887
## stagger                            0.664 0.4412
## stinking                           0.906 0.8203
## stroll                             0.900 0.8092
## stubborn_100                       0.539 0.2902
## stubborn_143                       0.639 0.4078
## stylish                            0.914 0.8358
## summit                             0.744 0.5539
## terminology                        0.878 0.7715
## the_science_of_speech_sounds       0.708 0.5020
## transportation                     0.367 0.1349
## tyrant                             0.874 0.7643
## unhealthful                        0.647 0.4180
## vile                               0.875 0.7656
## vulgar                             0.894 0.7988
## wandering                          0.522 0.2729
## warning                            0.621 0.3854
## wave                               0.727 0.5283
## weaponry                           0.681 0.4642
## 
## SS loadings:  77.8 
## Proportion Var:  0.515 
## 
## Factor correlations: 
## 
##    F1
## F1  1
vocab_item_stats$itemstats$mean %>% GG_denhist()
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`

vocab_item_stats$itemstats$mean %>% describe2()
## # A tibble: 1 × 10
##   var       n  mean median    sd   mad   min   max  skew kurtosis
##   <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 x       151 0.806  0.848 0.141 0.124 0.333 0.993 -1.14    0.933
GG_scatter(d, "vocab_sumscore", "g_easy")
## `geom_smooth()` using formula = 'y ~ x'

GG_denhist(d, "vocab_sumscore")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_denhist(d, "g_easy")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

d$vocab_sumscore_estimate = d$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct
d$vocab_ranking_estimate = d$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did
d %>% select(
vocab_sumscore,
g_easy,
vocab_sumscore_estimate,
vocab_ranking_estimate
) %>% wtd.cors()
##                         vocab_sumscore g_easy vocab_sumscore_estimate vocab_ranking_estimate
## vocab_sumscore                   1.000  0.954                   0.678                  0.497
## g_easy                           0.954  1.000                   0.703                  0.517
## vocab_sumscore_estimate          0.678  0.703                   1.000                  0.781
## vocab_ranking_estimate           0.497  0.517                   0.781                  1.000
GG_scatter(d, "Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct", "vocab_sumscore")
## `geom_smooth()` using formula = 'y ~ x'

GG_scatter(d, "Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did", "vocab_sumscore")
## `geom_smooth()` using formula = 'y ~ x'

describeBy(d$vocab_sumscore, d$I_was_a_slow_learner_in_school)
## 
##  Descriptive statistics by group 
## group: No
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 385  123 22.7    129     126 22.2  32 151   119 -0.94     0.26 1.16
## ------------------------------------------------------------------------------------------------ 
## group: Yes
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis se
## X1    1 56  112 22.4    109     112 24.5  61 149    88 0.05    -0.97  3
describeBy(d$vocab_sumscore, d$I_like_to_read_about_science)
## 
##  Descriptive statistics by group 
## group: No
##    vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 98  112 22.4    110     113 24.5  55 150    95 -0.17    -0.74 2.27
## ------------------------------------------------------------------------------------------------ 
## group: Yes
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 343  124 22.4    131     127 19.3  32 151   119 -1.03     0.48 1.21
describeBy(d$vocab_sumscore, d$A_person_shouldn_t_be_punished_for_breaking_a_law_that_he_thinks_is_unreasonable)
## 
##  Descriptive statistics by group 
## group: No
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 392  122 22.9    128     125 23.7  32 151   119 -0.79    -0.11 1.16
## ------------------------------------------------------------------------------------------------ 
## group: Yes
##    vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 49  117 23.2    121     119 23.7  57 148    91 -0.79     -0.1 3.31
#restandardize g
d$g_easy_ageadj = resid(ols(g_easy ~  rcs(age), data = d)) %>% unname()
## number of knots in rcs defaulting to 5
d$g_easy_ageadj_z = standardize(d$g_easy_ageadj, focal_group = d$white_only)

Hard items

# Hard items
hard_items = read_csv("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\follow up 20231029043500-SurveyExport.csv") %>% filter(Status == "Complete")
## New names:
## Rows: 475 Columns: 373
## ── Column specification
## ───────────────────────────────────────────────────────────────────────────────────────────────────────── Delimiter: "," chr
## (360): Status, Language, Referer, SessionID, User Agent, IP Address, Country, City, State/Region, Postal, Write in your ... dbl
## (7): Response ID, Longitude, Latitude, New Hidden Value...206, New Hidden Value...367, New Hidden Value...372, New Hid... lgl
## (4): Contact ID, Legacy Comments, Comments, Tags dttm (2): Time Started, Date Submitted
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ Specify the column types or set `show_col_types =
## FALSE` to quiet this message.
## • `recallable:Pick the 2 synonyms` -> `recallable:Pick the 2 synonyms...23`
## • `recallable:Pick the 2 synonyms` -> `recallable:Pick the 2 synonyms...90`
## • `New Hidden Value` -> `New Hidden Value...206`
## • `sack:Pick 3 words that belong together` -> `sack:Pick 3 words that belong together...279`
## • `sack:Pick 3 words that belong together` -> `sack:Pick 3 words that belong together...283`
## • `jaunty:Pick 3 words that belong together` -> `jaunty:Pick 3 words that belong together...336`
## • `jaunty:Pick 3 words that belong together` -> `jaunty:Pick 3 words that belong together...361`
## • `New Hidden Value` -> `New Hidden Value...367`
## • `New Hidden Value` -> `New Hidden Value...372`
## • `New Hidden Value` -> `New Hidden Value...373`
hard_items_meta = read_csv("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\follow up prolific_export_649a005bfc9bd0688f8e3304.csv")
## Rows: 465 Columns: 19
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (13): Submission id, Participant id, Status, Completion code, Age, Sex, Ethnicity simplified, Country of birth, Country ...
## dbl   (2): Time taken, Total approvals
## dttm  (4): Started at, Completed at, Reviewed at, Archived at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hard_items_var_list = df_var_table(hard_items)

#only completed data, only overlap
hard_items_meta %<>% filter(`Participant id` %in% hard_items$`Write in your Prolific ID:`)
hard_items %<>% filter(`Write in your Prolific ID:` %in% hard_items_meta$`Participant id`)

#subset to participants who passed our 8 attention checks

hard_items <- semi_join( #return all rows from x with a match in y
  hard_items, 
  d, 
  by = c("Write in your Prolific ID:" = "Participant_id")
)

nrow(hard_items)
## [1] 383
#no dups
assert_that(!anyDuplicated(hard_items$`Write in your Prolific ID:`))
## [1] TRUE
assert_that(!anyDuplicated(hard_items_meta$`Participant id`))
## [1] TRUE
#subset items
hard_items_2of5 = hard_items %>% select(21:205)
hard_items_3of5 = hard_items %>% select(207:366)
hard_items_1of5 = hard_items %>% select(368:371)

#score them
#1st options is always the right one
#but we can't see option order with csv export
hard_items_1of5_scored = score_items(hard_items_1of5,
key = c(
"whispering",
"verbiage",
"fragrance",
"sagacious"
)) %>% as_tibble()

hard_items_2of5_scored = map_df(seq_along(hard_items_2of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_2of5[, unlist(idx)]
i_cols_NA = i_cols
i_cols_NA[] = !is.na(i_cols_NA)

#score as correct using options 1+2
(i_cols_NA[, 1] & i_cols_NA[, 2]) %>% as.vector() %>% as.numeric()
})

hard_items_3of5_scored = map_df(seq_along(hard_items_3of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_3of5[, unlist(idx)]
i_cols_NA = i_cols
i_cols_NA[] = !is.na(i_cols_NA)

#score as correct using options 1+2
(i_cols_NA[, 1] & i_cols_NA[, 2] & i_cols_NA[, 3]) %>% as.vector() %>% as.numeric()
})

#combine
hard_items_scored = bind_cols(
hard_items_1of5_scored %>% set_names("1of5_" + seq_along(hard_items_1of5_scored)),
hard_items_2of5_scored %>% set_names("2of5_" + seq_along(hard_items_2of5_scored)),
hard_items_3of5_scored %>% set_names("3of5_" + seq_along(hard_items_3of5_scored))
)

#item stats
hard_items_scored_stats = itemstats(hard_items_scored)
hard_items$sumscore = rowSums(hard_items_scored)
hard_items <- hard_items %>% rename(time2 = 372) # total time for hard test

IRT on the hard items

#IRT fit
hard_items_fit = cache_object(filename = "data/hard_items_fit.rds", expr = mirt(
hard_items_scored,
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 5000),
),
renew = renew_all)
## Cache found, reading object from disk
hard_items_fit
## 
## Call:
## mirt(data = hard_items_scored, model = 1, itemtype = "2PL", technical = list(NCYCLES = 5000))
## 
## Full-information item factor analysis with 1 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 5000 EM iterations.
## mirt version: 1.42 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -14410
## Estimated parameters: 146 
## AIC = 29111
## BIC = 29688; SABIC = 29225
## G2 (1e+10) = 24263, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
hard_items_fit %>% summary()
##              F1      h2
## 1of5_1   0.4828 0.23313
## 1of5_2   0.3170 0.10048
## 1of5_3   0.3975 0.15803
## 1of5_4   0.3409 0.11624
## 2of5_1   0.4744 0.22505
## 2of5_2   0.5564 0.30955
## 2of5_3   0.5791 0.33535
## 2of5_4   0.6299 0.39681
## 2of5_5   0.5014 0.25143
## 2of5_6   0.7330 0.53727
## 2of5_7   0.3625 0.13142
## 2of5_8   0.6099 0.37193
## 2of5_9   0.5720 0.32723
## 2of5_10  0.3786 0.14337
## 2of5_11  0.3674 0.13496
## 2of5_12  0.4459 0.19881
## 2of5_13  0.4915 0.24157
## 2of5_14  0.4411 0.19455
## 2of5_15  0.6868 0.47171
## 2of5_16  0.5960 0.35516
## 2of5_17  0.6897 0.47566
## 2of5_18  0.7421 0.55064
## 2of5_19  0.4780 0.22844
## 2of5_20  0.4095 0.16772
## 2of5_21  0.5408 0.29244
## 2of5_22  0.9027 0.81483
## 2of5_23  0.3318 0.11010
## 2of5_24  0.2422 0.05868
## 2of5_25  0.7235 0.52343
## 2of5_26  0.7245 0.52491
## 2of5_27  0.6160 0.37941
## 2of5_28  0.2964 0.08783
## 2of5_29  0.5670 0.32147
## 2of5_30  0.5707 0.32570
## 2of5_31  0.5459 0.29801
## 2of5_32  0.6786 0.46052
## 2of5_33  0.3562 0.12691
## 2of5_34  0.4856 0.23577
## 2of5_35  0.3152 0.09936
## 2of5_36  0.8505 0.72336
## 2of5_37  0.9777 0.95591
## 3of5_1   0.2567 0.06588
## 3of5_2   0.5414 0.29309
## 3of5_3   0.5198 0.27016
## 3of5_4   0.7462 0.55682
## 3of5_5   0.3335 0.11122
## 3of5_6   0.7531 0.56723
## 3of5_7   0.6111 0.37350
## 3of5_8   0.4374 0.19131
## 3of5_9   0.2678 0.07171
## 3of5_10  0.8271 0.68412
## 3of5_11  0.7382 0.54499
## 3of5_12  0.5714 0.32652
## 3of5_13  0.3597 0.12940
## 3of5_14  0.4754 0.22599
## 3of5_15 -0.0516 0.00266
## 3of5_16  0.3488 0.12169
## 3of5_17  0.4429 0.19618
## 3of5_18  0.3327 0.11070
## 3of5_19 -0.2561 0.06558
## 3of5_20  0.5689 0.32367
## 3of5_21  0.4006 0.16047
## 3of5_22  0.4663 0.21744
## 3of5_23  0.7927 0.62835
## 3of5_24  0.5444 0.29633
## 3of5_25  0.5522 0.30494
## 3of5_26  0.7385 0.54540
## 3of5_27  0.6120 0.37451
## 3of5_28  0.3782 0.14307
## 3of5_29  0.4970 0.24700
## 3of5_30  0.5716 0.32670
## 3of5_31  0.4063 0.16505
## 3of5_32  0.5819 0.33863
## 
## SS loadings:  22 
## Proportion Var:  0.301 
## 
## Factor correlations: 
## 
##    F1
## F1  1
hard_items_scored_stats$itemstats$g_loading = hard_items_fit %>% summary() %>% .$rotF %>% as.vector()
##              F1      h2
## 1of5_1   0.4828 0.23313
## 1of5_2   0.3170 0.10048
## 1of5_3   0.3975 0.15803
## 1of5_4   0.3409 0.11624
## 2of5_1   0.4744 0.22505
## 2of5_2   0.5564 0.30955
## 2of5_3   0.5791 0.33535
## 2of5_4   0.6299 0.39681
## 2of5_5   0.5014 0.25143
## 2of5_6   0.7330 0.53727
## 2of5_7   0.3625 0.13142
## 2of5_8   0.6099 0.37193
## 2of5_9   0.5720 0.32723
## 2of5_10  0.3786 0.14337
## 2of5_11  0.3674 0.13496
## 2of5_12  0.4459 0.19881
## 2of5_13  0.4915 0.24157
## 2of5_14  0.4411 0.19455
## 2of5_15  0.6868 0.47171
## 2of5_16  0.5960 0.35516
## 2of5_17  0.6897 0.47566
## 2of5_18  0.7421 0.55064
## 2of5_19  0.4780 0.22844
## 2of5_20  0.4095 0.16772
## 2of5_21  0.5408 0.29244
## 2of5_22  0.9027 0.81483
## 2of5_23  0.3318 0.11010
## 2of5_24  0.2422 0.05868
## 2of5_25  0.7235 0.52343
## 2of5_26  0.7245 0.52491
## 2of5_27  0.6160 0.37941
## 2of5_28  0.2964 0.08783
## 2of5_29  0.5670 0.32147
## 2of5_30  0.5707 0.32570
## 2of5_31  0.5459 0.29801
## 2of5_32  0.6786 0.46052
## 2of5_33  0.3562 0.12691
## 2of5_34  0.4856 0.23577
## 2of5_35  0.3152 0.09936
## 2of5_36  0.8505 0.72336
## 2of5_37  0.9777 0.95591
## 3of5_1   0.2567 0.06588
## 3of5_2   0.5414 0.29309
## 3of5_3   0.5198 0.27016
## 3of5_4   0.7462 0.55682
## 3of5_5   0.3335 0.11122
## 3of5_6   0.7531 0.56723
## 3of5_7   0.6111 0.37350
## 3of5_8   0.4374 0.19131
## 3of5_9   0.2678 0.07171
## 3of5_10  0.8271 0.68412
## 3of5_11  0.7382 0.54499
## 3of5_12  0.5714 0.32652
## 3of5_13  0.3597 0.12940
## 3of5_14  0.4754 0.22599
## 3of5_15 -0.0516 0.00266
## 3of5_16  0.3488 0.12169
## 3of5_17  0.4429 0.19618
## 3of5_18  0.3327 0.11070
## 3of5_19 -0.2561 0.06558
## 3of5_20  0.5689 0.32367
## 3of5_21  0.4006 0.16047
## 3of5_22  0.4663 0.21744
## 3of5_23  0.7927 0.62835
## 3of5_24  0.5444 0.29633
## 3of5_25  0.5522 0.30494
## 3of5_26  0.7385 0.54540
## 3of5_27  0.6120 0.37451
## 3of5_28  0.3782 0.14307
## 3of5_29  0.4970 0.24700
## 3of5_30  0.5716 0.32670
## 3of5_31  0.4063 0.16505
## 3of5_32  0.5819 0.33863
## 
## SS loadings:  22 
## Proportion Var:  0.301 
## 
## Factor correlations: 
## 
##    F1
## F1  1
hard_items_scored_stats$itemstats
##           N  mean    sd total.r total.r_if_rm alpha_if_rm g_loading
## 1of5_1  383 0.305 0.461   0.417         0.387       0.927     0.483
## 1of5_2  383 0.303 0.460   0.274         0.241       0.928     0.317
## 1of5_3  383 0.245 0.431   0.323         0.294       0.928     0.398
## 1of5_4  383 0.431 0.496   0.319         0.285       0.928     0.341
## 2of5_1  383 0.475 0.500   0.418         0.387       0.927     0.474
## 2of5_2  383 0.269 0.444   0.451         0.424       0.927     0.556
## 2of5_3  383 0.465 0.499   0.483         0.454       0.927     0.579
## 2of5_4  383 0.266 0.443   0.502         0.477       0.927     0.630
## 2of5_5  383 0.554 0.498   0.433         0.402       0.927     0.501
## 2of5_6  383 0.713 0.453   0.534         0.509       0.927     0.733
## 2of5_7  383 0.313 0.464   0.323         0.291       0.928     0.363
## 2of5_8  383 0.245 0.431   0.488         0.463       0.927     0.610
## 2of5_9  383 0.436 0.497   0.499         0.470       0.927     0.572
## 2of5_10 383 0.423 0.495   0.339         0.305       0.928     0.379
## 2of5_11 383 0.527 0.500   0.334         0.300       0.928     0.367
## 2of5_12 383 0.232 0.423   0.354         0.326       0.928     0.446
## 2of5_13 383 0.326 0.469   0.419         0.389       0.927     0.492
## 2of5_14 383 0.136 0.343   0.298         0.275       0.928     0.441
## 2of5_15 383 0.721 0.449   0.491         0.465       0.927     0.687
## 2of5_16 383 0.493 0.501   0.506         0.477       0.927     0.596
## 2of5_17 383 0.569 0.496   0.566         0.540       0.926     0.690
## 2of5_18 383 0.402 0.491   0.625         0.601       0.926     0.742
## 2of5_19 383 0.522 0.500   0.413         0.381       0.927     0.478
## 2of5_20 383 0.559 0.497   0.363         0.330       0.928     0.410
## 2of5_21 383 0.266 0.443   0.448         0.421       0.927     0.541
## 2of5_22 383 0.567 0.496   0.723         0.705       0.925     0.903
## 2of5_23 383 0.282 0.451   0.298         0.266       0.928     0.332
## 2of5_24 383 0.238 0.426   0.199         0.168       0.929     0.242
## 2of5_25 383 0.543 0.499   0.590         0.565       0.926     0.723
## 2of5_26 383 0.467 0.500   0.618         0.594       0.926     0.725
## 2of5_27 383 0.256 0.437   0.494         0.469       0.927     0.616
## 2of5_28 383 0.554 0.498   0.273         0.238       0.928     0.296
## 2of5_29 383 0.420 0.494   0.496         0.467       0.927     0.567
## 2of5_30 383 0.580 0.494   0.478         0.448       0.927     0.571
## 2of5_31 383 0.298 0.458   0.446         0.418       0.927     0.546
## 2of5_32 383 0.117 0.322   0.424         0.404       0.927     0.679
## 2of5_33 383 0.272 0.445   0.298         0.267       0.928     0.356
## 2of5_34 383 0.509 0.501   0.431         0.400       0.927     0.486
## 2of5_35 383 0.433 0.496   0.286         0.251       0.928     0.315
## 2of5_36 383 0.990 0.102   0.140         0.132       0.928     0.851
## 2of5_37 383 0.997 0.051   0.096         0.092       0.929     0.978
## 3of5_1  383 0.407 0.492   0.257         0.222       0.928     0.257
## 3of5_2  383 0.157 0.364   0.378         0.354       0.928     0.541
## 3of5_3  383 0.185 0.389   0.382         0.357       0.928     0.520
## 3of5_4  383 0.480 0.500   0.625         0.601       0.926     0.746
## 3of5_5  383 0.394 0.489   0.311         0.278       0.928     0.333
## 3of5_6  383 0.475 0.500   0.616         0.592       0.926     0.753
## 3of5_7  383 0.230 0.421   0.472         0.447       0.927     0.611
## 3of5_8  383 0.099 0.299   0.269         0.248       0.928     0.437
## 3of5_9  383 0.264 0.441   0.231         0.199       0.928     0.268
## 3of5_10 383 0.794 0.405   0.511         0.488       0.927     0.827
## 3of5_11 383 0.311 0.463   0.605         0.583       0.926     0.738
## 3of5_12 383 0.499 0.501   0.489         0.460       0.927     0.571
## 3of5_13 383 0.180 0.385   0.263         0.236       0.928     0.360
## 3of5_14 383 0.587 0.493   0.391         0.359       0.928     0.475
## 3of5_15 383 0.285 0.452  -0.010        -0.044       0.930    -0.052
## 3of5_16 383 0.467 0.500   0.313         0.279       0.928     0.349
## 3of5_17 383 0.368 0.483   0.385         0.354       0.928     0.443
## 3of5_18 383 0.428 0.495   0.296         0.261       0.928     0.333
## 3of5_19 383 0.078 0.269  -0.083        -0.103       0.929    -0.256
## 3of5_20 383 0.339 0.474   0.487         0.459       0.927     0.569
## 3of5_21 383 0.601 0.490   0.338         0.304       0.928     0.401
## 3of5_22 383 0.540 0.499   0.394         0.362       0.928     0.466
## 3of5_23 383 0.898 0.303   0.370         0.350       0.928     0.793
## 3of5_24 383 0.637 0.481   0.441         0.411       0.927     0.544
## 3of5_25 383 0.379 0.486   0.473         0.444       0.927     0.552
## 3of5_26 383 0.373 0.484   0.622         0.599       0.926     0.739
## 3of5_27 383 0.178 0.383   0.438         0.414       0.927     0.612
## 3of5_28 383 0.815 0.389   0.247         0.219       0.928     0.378
## 3of5_29 383 0.825 0.380   0.293         0.267       0.928     0.497
## 3of5_30 383 0.433 0.496   0.490         0.461       0.927     0.572
## 3of5_31 383 0.702 0.458   0.316         0.285       0.928     0.406
## 3of5_32 383 0.705 0.457   0.432         0.403       0.927     0.582
hard_items_scored_stats$itemstats %>% describe2()
## # A tibble: 7 × 10
##   var               n    mean  median       sd      mad      min     max      skew kurtosis
##   <chr>         <dbl>   <dbl>   <dbl>    <dbl>    <dbl>    <dbl>   <dbl>     <dbl>    <dbl>
## 1 N                73 383     383     0        0        383      383     NaN        NaN    
## 2 mean             73   0.436   0.428 0.206    0.205      0.0783   0.997   0.651      0.114
## 3 sd               73   0.445   0.469 0.0826   0.0411     0.0511   0.501  -2.74       8.87 
## 4 total.r          73   0.397   0.417 0.143    0.127     -0.0834   0.723  -0.604      1.15 
## 5 total.r_if_rm    73   0.369   0.387 0.144    0.134     -0.103    0.705  -0.520      1.03 
## 6 alpha_if_rm      73   0.927   0.927 0.000813 0.000801   0.925    0.930  -0.00918    0.342
## 7 g_loading        73   0.512   0.520 0.199    0.177     -0.256    0.978  -0.684      2.27
#difficulties
hard_items_scored_stats$itemstats$mean %>% GG_denhist()
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`

#loadings
hard_items_scored_stats$itemstats$g_loading %>% GG_denhist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#reliability
plot(hard_items_fit, type = "rxx")

hard_items_fit_scores = fscores(hard_items_fit, full.scores.SE = T)
empirical_rxx(hard_items_fit_scores)
##    F1 
## 0.938
#time spent vs. score
# hard_items_scored$time_spent_mins = (hard_items$`Date Submitted`-hard_items$`Time Started`)/60

#exact choices table
hard_items_2of5_choices = map_df(seq_along(hard_items_2of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_2of5[, unlist(idx)]

#string collapse across columns
apply(i_cols, 1, function(x) {
str_c(na.omit(x), collapse = ", ")
})
}) %>% set_colnames("pick2of5_" + 1:ncol(hard_items_2of5_scored))

hard_items_3of5_choices = map_df(seq_along(hard_items_3of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_3of5[, unlist(idx)]

#string collapse across columns
apply(i_cols, 1, function(x) {
str_c(na.omit(x), collapse = ", ")
})
}) %>% set_colnames("pick3of5_" + 1:ncol(hard_items_3of5_scored))

Full test descriptive stats

# Full test

#merge data
all_items_scored = left_join(
easy_items_scored %>% mutate(id = d$Participant_id),
hard_items_scored %>% mutate(id = hard_items$`Write in your Prolific ID:`),
by = "id"
)
#no dups
assert_that(!anyDuplicated(all_items_scored$id))
## [1] TRUE
d1 = left_join(
all_items_scored,
hard_items %>% mutate(id = hard_items$`Write in your Prolific ID:`),
by = "id"
)

d$time<-d$New_Hidden_Value_197 # time since survey taker started the current page
dk <- d1 %>% left_join(d %>% select(Participant_id, age, time), by = c("id" = "Participant_id"))

start_col <- which(names(dk) == "a_specific_number")
end_col <- which(names(dk) == "weaponry")
column_names <- names(dk)[start_col:end_col]
dk <- dk %>% mutate(easy_test = rowSums(select(., all_of(column_names)), na.rm = TRUE))

start_col <- which(names(dk) == "1of5_1")
end_col <- which(names(dk) == "3of5_32")
column_names <- names(dk)[start_col:end_col]
dk <- dk %>% mutate(hard_test = rowSums(select(., all_of(column_names)), na.rm = TRUE))
dk <- dk %>% mutate(hard_missing = case_when(hard_test >0 ~ 1, hard_test ==0 ~ 0))
print(describeBy(dk$easy_test, dk$hard_missing))
## 
##  Descriptive statistics by group 
## group: 0
##    vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 58  112 24.5    114     113 31.1  64 150    86 -0.16    -1.25 3.22
## ------------------------------------------------------------------------------------------------ 
## group: 1
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 383  123 22.4    129     126 22.2  32 151   119 -0.9     0.27 1.15
na_count <- sum(is.na(dk$sumscore))
print(na_count)
## [1] 58
print(describeBy(d$Total_approvals))
## Warning in describeBy(d$Total_approvals): no grouping variable requested
##    vars   n mean   sd median trimmed  mad min  max range skew kurtosis   se
## X1    1 441 1494 1278   1177    1321 1173   4 6633  6629 1.18     1.12 60.8
cor(dk$age, dk$easy_test, use="pairwise.complete.obs", method="pearson")
## [1] 0.347
dk0 <- dk %>% filter(hard_test > 0)
nrow(dk0)
## [1] 383
ggplot(dk0, aes(x = easy_test, y = hard_test)) +
geom_point() +
stat_cor(method = "pearson") + # displays correlation in the plot
labs(title = "Scatter Plot of Easy Test vs Hard Test",
x = "Easy Test",
y = "Hard Test")

dk0$age <- as.numeric(dk0$age) 

ggplot(dk0, aes(x = age, y = hard_test)) +
geom_point() +
stat_cor(method = "pearson") + # displays correlation in the plot, but only if age is numeric
labs(title = "Hard test versus Age",
x = "age",
y = "Hard Test")

cor(dk0$easy_test, dk0$hard_test, use="pairwise.complete.obs", method="pearson")
## [1] 0.814
cor(dk0$age, dk0$easy_test, use="pairwise.complete.obs", method="pearson")
## [1] 0.334
cor(dk0$age, dk0$hard_test, use="pairwise.complete.obs", method="pearson")
## [1] 0.325
cor(dk0$time, dk0$easy_test, use="pairwise.complete.obs", method="pearson")
## [1] -0.143
cor(dk0$time2, dk0$hard_test, use="pairwise.complete.obs", method="pearson")
## [1] -0.0248
dk0 <- dk0 %>% mutate(dk0, aged= case_when(
age>=70~1,
age<=69~0))

print(describeBy(dk0$time, dk0$aged))
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n mean  sd median trimmed mad min  max range skew kurtosis   se
## X1    1 358 1437 745   1222    1316 480 501 5024  4523 1.92     4.57 39.4
## ------------------------------------------------------------------------------------------------ 
## group: 1
##    vars  n mean  sd median trimmed mad min  max range skew kurtosis se
## X1    1 25 1199 375   1114    1153 249 779 2363  1584 1.31     1.52 75
print(describeBy(dk0$time2, dk0$aged))
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n mean   sd median trimmed mad min   max range skew kurtosis  se
## X1    1 358 1406 2204   1117    1165 481 363 35564 35201 12.6      178 116
## ------------------------------------------------------------------------------------------------ 
## group: 1
##    vars  n mean  sd median trimmed mad min  max range skew kurtosis   se
## X1    1 25 1270 421   1229    1237 535 670 2220  1550 0.59    -0.55 84.2
print(describeBy(dk0$easy_test, dk0$aged))
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 358  122 22.5    128     125 24.5  32 151   119 -0.85     0.18 1.19
## ------------------------------------------------------------------------------------------------ 
## group: 1
##    vars  n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 25  134 18.4    140     137 8.9  68 151    83 -1.93        4 3.69
print(describeBy(dk0$hard_test, dk0$aged))
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis  se
## X1    1 358 31.3 13.2     29    30.7 14.8   7  66    59 0.37    -0.83 0.7
## ------------------------------------------------------------------------------------------------ 
## group: 1
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 25 39.6 13.4     41    39.8 13.3  15  65    50 -0.2    -0.97 2.69
d$time<-d$New_Hidden_Value_197 # time since survey taker started the current page

ggplot(d, aes(x = time, y = age)) +
geom_point() +
  stat_cor(method = "pearson") + # displays correlation in the plot, but only if age is numeric
labs(title = "Test Time versus Age",
x = "Test Time",
y = "age")

#fit all dataset to same model
#takes a while to converge at default settings
#not even after 20k iter
all_items_fit = cache_object(filename = "data/all_items_fit.rds", expr = mirt(
all_items_scored %>% select(-id),
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 2000)
),
renew = renew_all)
## Cache found, reading object from disk
#item stats
all_items_stats = itemstats(all_items_scored %>% select(-id))
all_items_fit
## 
## Call:
## mirt(data = all_items_scored %>% select(-id), model = 1, itemtype = "2PL", 
##     technical = list(NCYCLES = 2000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 322 EM iterations.
## mirt version: 1.42 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -37382
## Estimated parameters: 448 
## AIC = 75660
## BIC = 77492; SABIC = 76070
all_items_fit %>% summary()
##                                         F1      h2
## a_specific_number                   0.4560 0.20789
## a_type_of_brass_instrument          0.8771 0.76923
## a_type_of_drapery                   0.6508 0.42360
## a_type_of_fabric_101                0.7424 0.55123
## a_type_of_fabric_98                 0.6988 0.48836
## a_type_of_grasshopper               0.8023 0.64370
## a_type_of_hat                       0.5737 0.32917
## a_type_of_heating_unit              0.4979 0.24789
## a_type_of_magistrate_position       0.7441 0.55364
## a_type_of_mathematical_operation    0.5695 0.32429
## a_type_of_overshoe                  0.4956 0.24566
## a_type_of_plant                     0.7337 0.53827
## a_type_of_religious_teacher         0.7790 0.60687
## a_type_of_rock                      0.3189 0.10172
## a_type_of_skirts                    0.6417 0.41179
## a_type_of_smokeless_powder          0.4664 0.21751
## a_type_of_sound                     0.4894 0.23953
## a_type_of_sword                     0.3558 0.12657
## a_type_of_volcanic_crater           0.6664 0.44412
## a_type_of_wavy_form                 0.7062 0.49872
## actionable_negligence               0.7682 0.59008
## advisory                            0.7231 0.52283
## advocate                            0.7009 0.49129
## almighty                            0.9138 0.83505
## amazement                           0.6603 0.43605
## amenability                         0.7384 0.54527
## auspices                            0.6870 0.47199
## barely_able_to_read_and_write       0.8397 0.70510
## because                             0.6806 0.46319
## berate                              0.8359 0.69873
## blade                               0.7819 0.61141
## blend                               0.6814 0.46427
## blunder                             0.8589 0.73774
## bow                                 0.7508 0.56370
## brief                               0.4324 0.18699
## bring_about                         0.7888 0.62228
## carelessly_or_hastily_put_together  0.6081 0.36974
## celebration                         0.7185 0.51622
## cheat                               0.8051 0.64823
## clay_pigeon_shooting                0.7660 0.58677
## coarse                              0.7755 0.60136
## collect_or_study_insects            0.6253 0.39096
## colossal                            0.7116 0.50634
## commotion                           0.9020 0.81358
## complainer                          0.7944 0.63115
## confiscate                          0.9086 0.82559
## congratulate                        0.5907 0.34891
## contemplate                         0.4685 0.21948
## convoy                              0.8540 0.72929
## couch                               0.6500 0.42252
## crease                              0.7622 0.58099
## cunning                             0.6379 0.40695
## deceit                              0.5741 0.32964
## deliberately                        0.7601 0.57774
## deprive                             0.6354 0.40377
## detectable                          0.3379 0.11416
## diatribes                           0.5787 0.33491
## disjoined                           0.3118 0.09721
## disrupt                             0.7905 0.62488
## distinct                            0.2755 0.07590
## divergence                          0.7954 0.63259
## dome                                0.5515 0.30420
## downfall                            0.7742 0.59934
## drink                               0.8507 0.72361
## elite                               0.6987 0.48813
## embarrassment                       0.6018 0.36217
## emphasize                           0.8795 0.77355
## empire                              0.5053 0.25538
## empty                               0.7586 0.57543
## entanglement                        0.6907 0.47709
## environment                         0.6582 0.43321
## excite                              0.7748 0.60036
## flammable                           0.9196 0.84567
## flatterer                           0.8670 0.75177
## flatteries                          0.6458 0.41708
## forever                             0.7978 0.63656
## frequent                            0.0918 0.00843
## gigantic                            0.7624 0.58127
## girl                                0.7792 0.60716
## goo                                 0.7159 0.51247
## goodbye                             0.8195 0.67163
## greed                               0.8404 0.70630
## groan                               0.8507 0.72368
## gruesome                            0.6646 0.44166
## guarantee                           0.2832 0.08018
## gutter                              0.6566 0.43107
## harmfulness                         0.5228 0.27333
## hobby                               0.8052 0.64835
## hut                                 0.8105 0.65694
## illness                             0.4915 0.24155
## impromptu                           0.6602 0.43582
## indescribable                       0.7615 0.57986
## intellectual                        0.7103 0.50459
## jargon                              0.9257 0.85691
## knowledgeable                       0.8164 0.66652
## lackadaisical                       0.6056 0.36677
## manager                             0.8667 0.75116
## meal                                0.7912 0.62601
## melodic                             0.7282 0.53030
## mutually                            0.8726 0.76141
## nonsense                            0.8601 0.73980
## nonsensical                         0.8710 0.75860
## not_coveted                         0.6638 0.44058
## pamper                              0.5901 0.34827
## penitentiary                        0.5260 0.27671
## perplexing                          0.6897 0.47566
## persistence                         0.6367 0.40544
## predetermine                        0.5420 0.29378
## pretender                           0.8563 0.73318
## questioning                         0.6609 0.43685
## quickly                             0.6566 0.43113
## rebellious                          0.8024 0.64392
## referee                             0.7714 0.59502
## referendum                          0.5253 0.27596
## relating_to_the_right               0.3501 0.12259
## relevant                            0.6983 0.48765
## respectful                          0.7700 0.59296
## retailer                            0.6003 0.36041
## retract                             0.5233 0.27385
## ropes                               0.7983 0.63724
## sag                                 0.5958 0.35496
## schemer                             0.6689 0.44738
## seize                               0.7927 0.62842
## sensitivity                         0.7169 0.51397
## shadows                             0.6587 0.43386
## sketch                              0.6537 0.42734
## slang                               0.8019 0.64312
## slave                               0.4811 0.23146
## sluggish                            0.6341 0.40204
## somber                              0.6054 0.36651
## spinelessness                       0.6318 0.39922
## sporadic                            0.8308 0.69028
## squad                               0.9747 0.95006
## stagger                             0.6521 0.42519
## stinking                            0.8760 0.76741
## stroll                              0.8714 0.75940
## stubborn_100                        0.5511 0.30372
## stubborn_143                        0.6541 0.42788
## stylish                             0.6914 0.47802
## summit                              0.7555 0.57083
## terminology                         0.8288 0.68694
## the_science_of_speech_sounds        0.7507 0.56350
## transportation                      0.2980 0.08879
## tyrant                              0.4395 0.19317
## unhealthful                         0.4189 0.17545
## vile                                0.8810 0.77609
## vulgar                              0.7191 0.51711
## wandering                           0.4151 0.17229
## warning                             0.6374 0.40624
## wave                                0.7200 0.51835
## weaponry                            0.6755 0.45632
## 1of5_1                              0.4402 0.19375
## 1of5_2                              0.2892 0.08363
## 1of5_3                              0.3770 0.14216
## 1of5_4                              0.3109 0.09664
## 2of5_1                              0.4115 0.16937
## 2of5_2                              0.5367 0.28809
## 2of5_3                              0.5814 0.33800
## 2of5_4                              0.6195 0.38372
## 2of5_5                              0.5203 0.27073
## 2of5_6                              0.7351 0.54043
## 2of5_7                              0.3749 0.14052
## 2of5_8                              0.5864 0.34383
## 2of5_9                              0.5549 0.30788
## 2of5_10                             0.3782 0.14305
## 2of5_11                             0.3707 0.13745
## 2of5_12                             0.4195 0.17600
## 2of5_13                             0.4912 0.24124
## 2of5_14                             0.4024 0.16195
## 2of5_15                             0.6763 0.45745
## 2of5_16                             0.5748 0.33038
## 2of5_17                             0.6795 0.46169
## 2of5_18                             0.7350 0.54027
## 2of5_19                             0.4470 0.19980
## 2of5_20                             0.3849 0.14816
## 2of5_21                             0.5165 0.26682
## 2of5_22                             0.8802 0.77482
## 2of5_23                             0.3123 0.09753
## 2of5_24                             0.2116 0.04476
## 2of5_25                             0.7345 0.53949
## 2of5_26                             0.7117 0.50650
## 2of5_27                             0.5825 0.33932
## 2of5_28                             0.2751 0.07568
## 2of5_29                             0.5487 0.30107
## 2of5_30                             0.5515 0.30421
## 2of5_31                             0.5391 0.29058
## 2of5_32                             0.6284 0.39494
## 2of5_33                             0.3334 0.11117
## 2of5_34                             0.4533 0.20551
## 2of5_35                             0.2949 0.08697
## 2of5_36                             0.7625 0.58146
## 2of5_37                             0.8562 0.73303
## 3of5_1                              0.2726 0.07430
## 3of5_2                              0.4951 0.24512
## 3of5_3                              0.4760 0.22656
## 3of5_4                              0.7072 0.50017
## 3of5_5                              0.3302 0.10900
## 3of5_6                              0.7498 0.56227
## 3of5_7                              0.5949 0.35395
## 3of5_8                              0.4098 0.16792
## 3of5_9                              0.2478 0.06142
## 3of5_10                             0.8494 0.72152
## 3of5_11                             0.7229 0.52262
## 3of5_12                             0.5459 0.29798
## 3of5_13                             0.3646 0.13292
## 3of5_14                             0.5077 0.25774
## 3of5_15                            -0.0448 0.00201
## 3of5_16                             0.3256 0.10603
## 3of5_17                             0.3855 0.14863
## 3of5_18                             0.3197 0.10218
## 3of5_19                            -0.2267 0.05139
## 3of5_20                             0.5537 0.30654
## 3of5_21                             0.4213 0.17750
## 3of5_22                             0.4441 0.19724
## 3of5_23                             0.8596 0.73893
## 3of5_24                             0.5497 0.30220
## 3of5_25                             0.5256 0.27624
## 3of5_26                             0.7102 0.50445
## 3of5_27                             0.5969 0.35632
## 3of5_28                             0.4133 0.17084
## 3of5_29                             0.5406 0.29221
## 3of5_30                             0.5113 0.26144
## 3of5_31                             0.4263 0.18169
## 3of5_32                             0.5758 0.33151
## 
## SS loadings:  94.4 
## Proportion Var:  0.421 
## 
## Factor correlations: 
## 
##    F1
## F1  1
all_items_stats$itemstats$g_loading = all_items_fit %>% summary() %>% .$rotF %>% as.vector()
##                                         F1      h2
## a_specific_number                   0.4560 0.20789
## a_type_of_brass_instrument          0.8771 0.76923
## a_type_of_drapery                   0.6508 0.42360
## a_type_of_fabric_101                0.7424 0.55123
## a_type_of_fabric_98                 0.6988 0.48836
## a_type_of_grasshopper               0.8023 0.64370
## a_type_of_hat                       0.5737 0.32917
## a_type_of_heating_unit              0.4979 0.24789
## a_type_of_magistrate_position       0.7441 0.55364
## a_type_of_mathematical_operation    0.5695 0.32429
## a_type_of_overshoe                  0.4956 0.24566
## a_type_of_plant                     0.7337 0.53827
## a_type_of_religious_teacher         0.7790 0.60687
## a_type_of_rock                      0.3189 0.10172
## a_type_of_skirts                    0.6417 0.41179
## a_type_of_smokeless_powder          0.4664 0.21751
## a_type_of_sound                     0.4894 0.23953
## a_type_of_sword                     0.3558 0.12657
## a_type_of_volcanic_crater           0.6664 0.44412
## a_type_of_wavy_form                 0.7062 0.49872
## actionable_negligence               0.7682 0.59008
## advisory                            0.7231 0.52283
## advocate                            0.7009 0.49129
## almighty                            0.9138 0.83505
## amazement                           0.6603 0.43605
## amenability                         0.7384 0.54527
## auspices                            0.6870 0.47199
## barely_able_to_read_and_write       0.8397 0.70510
## because                             0.6806 0.46319
## berate                              0.8359 0.69873
## blade                               0.7819 0.61141
## blend                               0.6814 0.46427
## blunder                             0.8589 0.73774
## bow                                 0.7508 0.56370
## brief                               0.4324 0.18699
## bring_about                         0.7888 0.62228
## carelessly_or_hastily_put_together  0.6081 0.36974
## celebration                         0.7185 0.51622
## cheat                               0.8051 0.64823
## clay_pigeon_shooting                0.7660 0.58677
## coarse                              0.7755 0.60136
## collect_or_study_insects            0.6253 0.39096
## colossal                            0.7116 0.50634
## commotion                           0.9020 0.81358
## complainer                          0.7944 0.63115
## confiscate                          0.9086 0.82559
## congratulate                        0.5907 0.34891
## contemplate                         0.4685 0.21948
## convoy                              0.8540 0.72929
## couch                               0.6500 0.42252
## crease                              0.7622 0.58099
## cunning                             0.6379 0.40695
## deceit                              0.5741 0.32964
## deliberately                        0.7601 0.57774
## deprive                             0.6354 0.40377
## detectable                          0.3379 0.11416
## diatribes                           0.5787 0.33491
## disjoined                           0.3118 0.09721
## disrupt                             0.7905 0.62488
## distinct                            0.2755 0.07590
## divergence                          0.7954 0.63259
## dome                                0.5515 0.30420
## downfall                            0.7742 0.59934
## drink                               0.8507 0.72361
## elite                               0.6987 0.48813
## embarrassment                       0.6018 0.36217
## emphasize                           0.8795 0.77355
## empire                              0.5053 0.25538
## empty                               0.7586 0.57543
## entanglement                        0.6907 0.47709
## environment                         0.6582 0.43321
## excite                              0.7748 0.60036
## flammable                           0.9196 0.84567
## flatterer                           0.8670 0.75177
## flatteries                          0.6458 0.41708
## forever                             0.7978 0.63656
## frequent                            0.0918 0.00843
## gigantic                            0.7624 0.58127
## girl                                0.7792 0.60716
## goo                                 0.7159 0.51247
## goodbye                             0.8195 0.67163
## greed                               0.8404 0.70630
## groan                               0.8507 0.72368
## gruesome                            0.6646 0.44166
## guarantee                           0.2832 0.08018
## gutter                              0.6566 0.43107
## harmfulness                         0.5228 0.27333
## hobby                               0.8052 0.64835
## hut                                 0.8105 0.65694
## illness                             0.4915 0.24155
## impromptu                           0.6602 0.43582
## indescribable                       0.7615 0.57986
## intellectual                        0.7103 0.50459
## jargon                              0.9257 0.85691
## knowledgeable                       0.8164 0.66652
## lackadaisical                       0.6056 0.36677
## manager                             0.8667 0.75116
## meal                                0.7912 0.62601
## melodic                             0.7282 0.53030
## mutually                            0.8726 0.76141
## nonsense                            0.8601 0.73980
## nonsensical                         0.8710 0.75860
## not_coveted                         0.6638 0.44058
## pamper                              0.5901 0.34827
## penitentiary                        0.5260 0.27671
## perplexing                          0.6897 0.47566
## persistence                         0.6367 0.40544
## predetermine                        0.5420 0.29378
## pretender                           0.8563 0.73318
## questioning                         0.6609 0.43685
## quickly                             0.6566 0.43113
## rebellious                          0.8024 0.64392
## referee                             0.7714 0.59502
## referendum                          0.5253 0.27596
## relating_to_the_right               0.3501 0.12259
## relevant                            0.6983 0.48765
## respectful                          0.7700 0.59296
## retailer                            0.6003 0.36041
## retract                             0.5233 0.27385
## ropes                               0.7983 0.63724
## sag                                 0.5958 0.35496
## schemer                             0.6689 0.44738
## seize                               0.7927 0.62842
## sensitivity                         0.7169 0.51397
## shadows                             0.6587 0.43386
## sketch                              0.6537 0.42734
## slang                               0.8019 0.64312
## slave                               0.4811 0.23146
## sluggish                            0.6341 0.40204
## somber                              0.6054 0.36651
## spinelessness                       0.6318 0.39922
## sporadic                            0.8308 0.69028
## squad                               0.9747 0.95006
## stagger                             0.6521 0.42519
## stinking                            0.8760 0.76741
## stroll                              0.8714 0.75940
## stubborn_100                        0.5511 0.30372
## stubborn_143                        0.6541 0.42788
## stylish                             0.6914 0.47802
## summit                              0.7555 0.57083
## terminology                         0.8288 0.68694
## the_science_of_speech_sounds        0.7507 0.56350
## transportation                      0.2980 0.08879
## tyrant                              0.4395 0.19317
## unhealthful                         0.4189 0.17545
## vile                                0.8810 0.77609
## vulgar                              0.7191 0.51711
## wandering                           0.4151 0.17229
## warning                             0.6374 0.40624
## wave                                0.7200 0.51835
## weaponry                            0.6755 0.45632
## 1of5_1                              0.4402 0.19375
## 1of5_2                              0.2892 0.08363
## 1of5_3                              0.3770 0.14216
## 1of5_4                              0.3109 0.09664
## 2of5_1                              0.4115 0.16937
## 2of5_2                              0.5367 0.28809
## 2of5_3                              0.5814 0.33800
## 2of5_4                              0.6195 0.38372
## 2of5_5                              0.5203 0.27073
## 2of5_6                              0.7351 0.54043
## 2of5_7                              0.3749 0.14052
## 2of5_8                              0.5864 0.34383
## 2of5_9                              0.5549 0.30788
## 2of5_10                             0.3782 0.14305
## 2of5_11                             0.3707 0.13745
## 2of5_12                             0.4195 0.17600
## 2of5_13                             0.4912 0.24124
## 2of5_14                             0.4024 0.16195
## 2of5_15                             0.6763 0.45745
## 2of5_16                             0.5748 0.33038
## 2of5_17                             0.6795 0.46169
## 2of5_18                             0.7350 0.54027
## 2of5_19                             0.4470 0.19980
## 2of5_20                             0.3849 0.14816
## 2of5_21                             0.5165 0.26682
## 2of5_22                             0.8802 0.77482
## 2of5_23                             0.3123 0.09753
## 2of5_24                             0.2116 0.04476
## 2of5_25                             0.7345 0.53949
## 2of5_26                             0.7117 0.50650
## 2of5_27                             0.5825 0.33932
## 2of5_28                             0.2751 0.07568
## 2of5_29                             0.5487 0.30107
## 2of5_30                             0.5515 0.30421
## 2of5_31                             0.5391 0.29058
## 2of5_32                             0.6284 0.39494
## 2of5_33                             0.3334 0.11117
## 2of5_34                             0.4533 0.20551
## 2of5_35                             0.2949 0.08697
## 2of5_36                             0.7625 0.58146
## 2of5_37                             0.8562 0.73303
## 3of5_1                              0.2726 0.07430
## 3of5_2                              0.4951 0.24512
## 3of5_3                              0.4760 0.22656
## 3of5_4                              0.7072 0.50017
## 3of5_5                              0.3302 0.10900
## 3of5_6                              0.7498 0.56227
## 3of5_7                              0.5949 0.35395
## 3of5_8                              0.4098 0.16792
## 3of5_9                              0.2478 0.06142
## 3of5_10                             0.8494 0.72152
## 3of5_11                             0.7229 0.52262
## 3of5_12                             0.5459 0.29798
## 3of5_13                             0.3646 0.13292
## 3of5_14                             0.5077 0.25774
## 3of5_15                            -0.0448 0.00201
## 3of5_16                             0.3256 0.10603
## 3of5_17                             0.3855 0.14863
## 3of5_18                             0.3197 0.10218
## 3of5_19                            -0.2267 0.05139
## 3of5_20                             0.5537 0.30654
## 3of5_21                             0.4213 0.17750
## 3of5_22                             0.4441 0.19724
## 3of5_23                             0.8596 0.73893
## 3of5_24                             0.5497 0.30220
## 3of5_25                             0.5256 0.27624
## 3of5_26                             0.7102 0.50445
## 3of5_27                             0.5969 0.35632
## 3of5_28                             0.4133 0.17084
## 3of5_29                             0.5406 0.29221
## 3of5_30                             0.5113 0.26144
## 3of5_31                             0.4263 0.18169
## 3of5_32                             0.5758 0.33151
## 
## SS loadings:  94.4 
## Proportion Var:  0.421 
## 
## Factor correlations: 
## 
##    F1
## F1  1
all_items_stats$itemstats$discrim = coef(all_items_fit, simplify = T)$items[, 1] %>% unname()
all_items_stats$itemstats$difficulty = -coef(all_items_fit, simplify = T)$items[, 2] %>% unname()
all_items_stats$itemstats
##                                      N  mean    sd total.r total.r_if_rm alpha_if_rm g_loading discrim difficulty
## a_specific_number                  441 0.841 0.366   0.259         0.249       0.974     0.456   0.872     -1.900
## a_type_of_brass_instrument         441 0.932 0.252   0.386         0.380       0.974     0.877   3.107     -4.937
## a_type_of_drapery                  441 0.823 0.382   0.389         0.380       0.974     0.651   1.459     -2.092
## a_type_of_fabric_101               441 0.934 0.248   0.320         0.314       0.974     0.742   1.886     -3.734
## a_type_of_fabric_98                441 0.755 0.431   0.480         0.470       0.974     0.699   1.663     -1.702
## a_type_of_grasshopper              441 0.821 0.384   0.511         0.503       0.974     0.802   2.288     -2.674
## a_type_of_hat                      441 0.356 0.479   0.467         0.456       0.974     0.574   1.192      0.725
## a_type_of_heating_unit             441 0.871 0.336   0.248         0.240       0.974     0.498   0.977     -2.213
## a_type_of_magistrate_position      441 0.898 0.303   0.356         0.349       0.974     0.744   1.896     -3.187
## a_type_of_mathematical_operation   441 0.921 0.271   0.262         0.255       0.974     0.569   1.179     -2.926
## a_type_of_overshoe                 441 0.635 0.482   0.396         0.384       0.974     0.496   0.971     -0.689
## a_type_of_plant                    441 0.862 0.346   0.422         0.414       0.974     0.734   1.838     -2.721
## a_type_of_religious_teacher        441 0.855 0.353   0.463         0.455       0.974     0.779   2.115     -2.874
## a_type_of_rock                     441 0.948 0.223   0.099         0.094       0.974     0.319   0.573     -3.034
## a_type_of_skirts                   441 0.828 0.378   0.386         0.377       0.974     0.642   1.424     -2.107
## a_type_of_smokeless_powder         441 0.753 0.432   0.299         0.288       0.974     0.466   0.897     -1.305
## a_type_of_sound                    441 0.680 0.467   0.363         0.351       0.974     0.489   0.955     -0.919
## a_type_of_sword                    441 0.599 0.491   0.259         0.245       0.974     0.356   0.648     -0.448
## a_type_of_volcanic_crater          441 0.855 0.353   0.379         0.371       0.974     0.666   1.521     -2.415
## a_type_of_wavy_form                441 0.615 0.487   0.551         0.541       0.974     0.706   1.698     -0.803
## actionable_negligence              441 0.841 0.366   0.446         0.438       0.974     0.768   2.042     -2.677
## advisory                           441 0.959 0.198   0.243         0.238       0.974     0.723   1.782     -4.191
## advocate                           441 0.751 0.433   0.500         0.491       0.974     0.701   1.673     -1.676
## almighty                           441 0.961 0.193   0.350         0.346       0.974     0.914   3.830     -6.556
## amazement                          441 0.918 0.274   0.305         0.298       0.974     0.660   1.497     -3.134
## amenability                        441 0.832 0.374   0.420         0.412       0.974     0.738   1.864     -2.454
## auspices                           441 0.494 0.501   0.559         0.548       0.974     0.687   1.609     -0.059
## barely_able_to_read_and_write      441 0.880 0.326   0.471         0.464       0.974     0.840   2.632     -3.619
## because                            441 0.912 0.284   0.284         0.277       0.974     0.681   1.581     -3.105
## berate                             441 0.857 0.350   0.450         0.442       0.974     0.836   2.592     -3.308
## blade                              441 0.939 0.240   0.342         0.336       0.974     0.782   2.135     -4.059
## blend                              441 0.912 0.284   0.327         0.320       0.974     0.681   1.584     -3.108
## blunder                            441 0.961 0.193   0.229         0.225       0.974     0.859   2.855     -5.396
## bow                                441 0.596 0.491   0.596         0.586       0.974     0.751   1.935     -0.768
## brief                              441 0.571 0.495   0.431         0.419       0.974     0.432   0.816     -0.349
## bring_about                        441 0.868 0.338   0.438         0.430       0.974     0.789   2.185     -3.081
## carelessly_or_hastily_put_together 441 0.866 0.341   0.312         0.304       0.974     0.608   1.304     -2.373
## celebration                        441 0.757 0.429   0.478         0.468       0.974     0.718   1.758     -1.775
## cheat                              441 0.723 0.448   0.574         0.566       0.974     0.805   2.310     -1.855
## clay_pigeon_shooting               441 0.943 0.232   0.293         0.287       0.974     0.766   2.028     -4.048
## coarse                             441 0.941 0.236   0.340         0.334       0.974     0.775   2.090     -4.062
## collect_or_study_insects           441 0.932 0.252   0.273         0.266       0.974     0.625   1.364     -3.243
## colossal                           441 0.823 0.382   0.417         0.408       0.974     0.712   1.724     -2.272
## commotion                          441 0.853 0.355   0.562         0.555       0.974     0.902   3.556     -4.138
## complainer                         441 0.839 0.368   0.482         0.474       0.974     0.794   2.226     -2.802
## confiscate                         441 0.896 0.306   0.484         0.477       0.974     0.909   3.703     -4.915
## congratulate                       441 0.887 0.317   0.294         0.285       0.974     0.591   1.246     -2.544
## contemplate                        441 0.902 0.297   0.242         0.234       0.974     0.468   0.903     -2.509
## convoy                             441 0.764 0.425   0.592         0.584       0.974     0.854   2.794     -2.529
## couch                              441 0.757 0.429   0.434         0.424       0.974     0.650   1.456     -1.601
## crease                             441 0.830 0.376   0.471         0.463       0.974     0.762   2.004     -2.540
## cunning                            441 0.796 0.403   0.369         0.359       0.974     0.638   1.410     -1.849
## deceit                             441 0.746 0.436   0.390         0.379       0.974     0.574   1.194     -1.392
## deliberately                       441 0.812 0.391   0.479         0.471       0.974     0.760   1.991     -2.366
## deprive                            441 0.837 0.370   0.384         0.375       0.974     0.635   1.401     -2.168
## detectable                         441 0.966 0.181   0.103         0.097       0.974     0.338   0.611     -3.503
## diatribes                          441 0.624 0.485   0.454         0.443       0.974     0.579   1.208     -0.696
## disjoined                          441 0.494 0.501   0.272         0.259       0.974     0.312   0.558      0.017
## disrupt                            441 0.959 0.198   0.275         0.270       0.974     0.790   2.197     -4.606
## distinct                           441 0.857 0.350   0.186         0.176       0.974     0.275   0.488     -1.872
## divergence                         441 0.698 0.459   0.590         0.582       0.974     0.795   2.233     -1.618
## dome                               441 0.744 0.437   0.394         0.383       0.974     0.552   1.125     -1.347
## downfall                           441 0.898 0.303   0.407         0.400       0.974     0.774   2.082     -3.351
## drink                              441 0.769 0.422   0.558         0.549       0.974     0.851   2.754     -2.542
## elite                              441 0.853 0.355   0.385         0.376       0.974     0.699   1.662     -2.496
## embarrassment                      441 0.875 0.331   0.314         0.305       0.974     0.602   1.283     -2.449
## emphasize                          441 0.966 0.181   0.285         0.280       0.974     0.880   3.146     -5.890
## empire                             441 0.816 0.388   0.314         0.305       0.974     0.505   0.997     -1.770
## empty                              441 0.848 0.359   0.448         0.440       0.974     0.759   1.981     -2.697
## entanglement                       441 0.542 0.499   0.538         0.527       0.974     0.691   1.626     -0.345
## environment                        441 0.503 0.501   0.541         0.531       0.974     0.658   1.488     -0.098
## excite                             441 0.707 0.455   0.581         0.572       0.974     0.775   2.086     -1.603
## flammable                          441 0.986 0.116   0.224         0.221       0.974     0.920   3.984     -8.083
## flatterer                          441 0.844 0.364   0.559         0.551       0.974     0.867   2.962     -3.480
## flatteries                         441 0.732 0.443   0.481         0.471       0.974     0.646   1.440     -1.426
## forever                            441 0.864 0.343   0.455         0.447       0.974     0.798   2.253     -3.088
## frequent                           441 0.977 0.149   0.001        -0.003       0.974     0.092   0.157     -3.775
## gigantic                           441 0.952 0.213   0.243         0.237       0.974     0.762   2.005     -4.232
## girl                               441 0.875 0.331   0.418         0.410       0.974     0.779   2.116     -3.100
## goo                                441 0.971 0.169   0.221         0.216       0.974     0.716   1.745     -4.515
## goodbye                            441 0.948 0.223   0.326         0.321       0.974     0.820   2.434     -4.562
## greed                              441 0.649 0.478   0.668         0.660       0.974     0.840   2.639     -1.441
## groan                              441 0.912 0.284   0.391         0.384       0.974     0.851   2.754     -4.195
## gruesome                           441 0.834 0.372   0.391         0.382       0.974     0.665   1.514     -2.224
## guarantee                          441 0.930 0.256   0.105         0.098       0.974     0.283   0.503     -2.683
## gutter                             441 0.941 0.236   0.225         0.219       0.974     0.657   1.481     -3.503
## harmfulness                        441 0.868 0.338   0.313         0.304       0.974     0.523   1.044     -2.230
## hobby                              441 0.880 0.326   0.443         0.436       0.974     0.805   2.311     -3.325
## hut                                441 0.889 0.315   0.454         0.447       0.974     0.811   2.355     -3.480
## illness                            441 0.717 0.451   0.333         0.322       0.974     0.491   0.961     -1.119
## impromptu                          441 0.626 0.484   0.514         0.503       0.974     0.660   1.496     -0.801
## indescribable                      441 0.955 0.208   0.292         0.287       0.974     0.761   2.000     -4.283
## intellectual                       441 0.943 0.232   0.294         0.288       0.974     0.710   1.718     -3.755
## jargon                             441 0.959 0.198   0.345         0.340       0.974     0.926   4.165     -6.891
## knowledgeable                      441 0.823 0.382   0.512         0.503       0.974     0.816   2.406     -2.790
## lackadaisical                      441 0.741 0.438   0.416         0.406       0.974     0.606   1.295     -1.412
## manager                            441 0.986 0.116   0.191         0.188       0.974     0.867   2.957     -6.731
## meal                               441 0.610 0.488   0.625         0.616       0.974     0.791   2.202     -0.957
## melodic                            441 0.873 0.333   0.422         0.414       0.974     0.728   1.808     -2.818
## mutually                           441 0.966 0.181   0.309         0.304       0.974     0.873   3.040     -5.765
## nonsense                           441 0.873 0.333   0.494         0.486       0.974     0.860   2.870     -3.755
## nonsensical                        441 0.973 0.163   0.305         0.300       0.974     0.871   3.017     -6.007
## not_coveted                        441 0.853 0.355   0.370         0.361       0.974     0.664   1.510     -2.386
## pamper                             441 0.467 0.499   0.453         0.442       0.974     0.590   1.244      0.121
## penitentiary                       441 0.918 0.274   0.240         0.232       0.974     0.526   1.053     -2.807
## perplexing                         441 0.905 0.294   0.375         0.368       0.974     0.690   1.621     -3.045
## persistence                        441 0.862 0.346   0.350         0.341       0.974     0.637   1.405     -2.398
## predetermine                       441 0.916 0.278   0.255         0.248       0.974     0.542   1.098     -2.805
## pretender                          441 0.991 0.095   0.140         0.137       0.974     0.856   2.821     -7.004
## questioning                        441 0.900 0.300   0.343         0.335       0.974     0.661   1.499     -2.889
## quickly                            441 0.789 0.408   0.402         0.392       0.974     0.657   1.482     -1.841
##  [ reached 'max' / getOption("max.print") -- omitted 113 rows ]
all_items_stats$itemstats %>%
describe2()
## # A tibble: 9 × 10
##   var               n    mean  median         sd       mad      min     max   skew kurtosis
##   <chr>         <dbl>   <dbl>   <dbl>      <dbl>     <dbl>    <dbl>   <dbl>  <dbl>    <dbl>
## 1 N               224 422.    441     27.2       0         383      441     -0.738  -1.46  
## 2 mean            224   0.685   0.754  0.239     0.234       0.0783   0.997 -0.695  -0.676 
## 3 sd              224   0.385   0.407  0.105     0.110       0.0511   0.501 -0.890   0.0891
## 4 total.r         224   0.383   0.391  0.130     0.128      -0.123    0.725 -0.383   0.811 
## 5 total.r_if_rm   224   0.374   0.383  0.129     0.126      -0.130    0.718 -0.366   0.810 
## 6 alpha_if_rm     224   0.974   0.974  0.0000919 0.0000923   0.974    0.975 -0.165   0.529 
## 7 g_loading       224   0.621   0.654  0.190     0.190      -0.227    0.975 -0.845   1.19  
## 8 discrim         224   1.59    1.47   0.877     0.761      -0.396    7.42   1.74    7.88  
## 9 difficulty      224  -1.70   -1.68   2.23      2.11      -14.2      2.74  -1.04    3.68
# descriptive stats for wave 1 test
all_items_stats$itemstats %>%
filter(N == 441) %>%
describe2()
## # A tibble: 9 × 10
##   var               n    mean  median        sd       mad       min     max    skew kurtosis
##   <chr>         <dbl>   <dbl>   <dbl>     <dbl>     <dbl>     <dbl>   <dbl>   <dbl>    <dbl>
## 1 N               151 441     441     0         0         441       441     NaN     NaN     
## 2 mean            151   0.806   0.848 0.141     0.124       0.333     0.993  -1.14    0.933 
## 3 sd              151   0.356   0.359 0.102     0.113       0.0823    0.501  -0.502  -0.502 
## 4 total.r         151   0.391   0.396 0.122     0.124       0.00126   0.669  -0.228   0.0736
## 5 total.r_if_rm   151   0.383   0.384 0.121     0.124      -0.00293   0.661  -0.209   0.0696
## 6 alpha_if_rm     151   0.974   0.974 0.0000823 0.0000877   0.974     0.974  -0.443  -0.176 
## 7 g_loading       151   0.681   0.699 0.157     0.142       0.0918    0.975  -0.899   0.823 
## 8 discrim         151   1.82    1.66  0.884     0.675       0.157     7.42    1.99    9.42  
## 9 difficulty      151  -2.62   -2.50  1.85      1.33      -14.2       0.783  -1.95    9.19
# descriptive stats for wave 2 test
all_items_stats$itemstats %>%
filter(N == 383) %>%
describe2()
## # A tibble: 9 × 10
##   var               n    mean  median       sd      mad      min     max     skew kurtosis
##   <chr>         <dbl>   <dbl>   <dbl>    <dbl>    <dbl>    <dbl>   <dbl>    <dbl>    <dbl>
## 1 N                73 383     383     0        0        383      383     NaN       NaN    
## 2 mean             73   0.436   0.428 0.206    0.205      0.0783   0.997   0.651     0.114
## 3 sd               73   0.445   0.469 0.0826   0.0411     0.0511   0.501  -2.74      8.87 
## 4 total.r          73   0.367   0.379 0.145    0.121     -0.123    0.725  -0.470     1.14 
## 5 total.r_if_rm    73   0.356   0.368 0.145    0.126     -0.130    0.718  -0.437     1.09 
## 6 alpha_if_rm      73   0.974   0.974 0.000108 0.000103   0.974    0.975  -0.0804    0.466
## 7 g_loading        73   0.497   0.511 0.193    0.174     -0.227    0.880  -0.628     1.80 
## 8 discrim          73   1.09    1.01  0.628    0.449     -0.396    3.16    1.10      1.90 
## 9 difficulty       73   0.204   0.354 1.69     1.09      -8.24     2.74   -2.43      8.74
#difficulties
all_items_stats$itemstats$mean %>% GG_denhist() +
scale_x_continuous("Pass rate")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/all items pass rate.png") # create folder named "figs" and put png in there
#loadings
all_items_stats$itemstats$g_loading %>% GG_denhist() +
scale_x_continuous("Factor loading")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/all items factor loading.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
plot(all_items_fit, type = "rxx")

rxx_info = plot(all_items_fit, type = "rxx")
all_items_fit_scores = fscores(all_items_fit, full.scores.SE = T)
empirical_rxx(all_items_fit_scores)
##    F1 
## 0.973
#which range has >.90?
d_rxx_info = tibble(
z = rxx_info$panel.args[[1]]$x,
rel = rxx_info$panel.args[[1]]$y
)

d_rxx_info %>%
filter(rel > .90) %>%
describe2()
## # A tibble: 2 × 10
##   var       n   mean median     sd    mad    min   max      skew kurtosis
##   <chr> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1 z       104 -0.784 -0.784 1.82   2.32   -3.89  2.32   1.20e-16   -1.23 
## 2 rel     104  0.966  0.976 0.0260 0.0222  0.901 0.992 -8.98e- 1   -0.400
d_rxx_info %>%
filter(rel > .80) %>%
describe2()
## # A tibble: 2 × 10
##   var       n   mean median     sd    mad    min   max      skew kurtosis
##   <chr> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1 z       133 -0.573 -0.573 2.32   2.95   -4.55  3.41   1.95e-16   -1.23 
## 2 rel     133  0.942  0.963 0.0528 0.0402  0.806 0.992 -1.01e+ 0   -0.169
d_rxx_info %>%
ggplot(aes(z, rel)) +
geom_line() +
scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
scale_x_continuous("Ability level (z)") +
coord_cartesian(xlim = c(-4, 4))

GG_save("figs/reliability as function of ability.png")
d_rxx_info %>%
filter(z >= -2, z <= 2) %>%
describe2()
## # A tibble: 2 × 10
##   var       n      mean    median     sd    mad    min   max      skew kurtosis
##   <chr> <dbl>     <dbl>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1 z        66 -2.42e-16 -4.44e-16 1.16   1.48   -1.96  1.96  -6.46e-17   -1.25 
## 2 rel      66  9.73e- 1  9.82e- 1 0.0208 0.0150  0.922 0.992 -9.38e- 1   -0.391
d_rxx_info %>%
filter(z >= -3, z <= 3) %>%
describe2()
## # A tibble: 2 × 10
##   var       n      mean    median     sd    mad    min   max      skew kurtosis
##   <chr> <dbl>     <dbl>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1 z       100 -2.45e-16 -4.44e-16 1.75   2.24   -2.98  2.98   2.33e-16   -1.24 
## 2 rel     100  9.60e- 1  9.77e- 1 0.0384 0.0208  0.850 0.992 -1.29e+ 0    0.556
#difficulty and g-loading
all_items_stats$itemstats %>%
rownames_to_column() %>%
GG_scatter("mean", "g_loading", case_names = "rowname") +
xlab("Pass rate") +
ylab("Factor loading")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/scatter pass rate loading.png")
## `geom_smooth()` using formula = 'y ~ x'
#merge g score back to main dataset
d2 = left_join(
d,
tibble(id = all_items_scored$id,
g = all_items_fit_scores[, 1]),
by = c("Participant_id" = "id")
)
assert_that(!anyDuplicated(d2$Participant_id))
## [1] TRUE
#restandardize g for age, white subset
d2$g_ageadj = resid(ols(g ~  rcs(age), data = d2)) %>% unname()
## number of knots in rcs defaulting to 5
d2$g_ageadj_z = standardize(d2$g, focal_group = d2$white_only)

Good items

all_items_stats$itemstats %>% filter(g_loading < .25)
##            N  mean    sd total.r total.r_if_rm alpha_if_rm g_loading discrim difficulty
## frequent 441 0.977 0.149   0.001        -0.003       0.974     0.092   0.157     -3.775
## 2of5_24  383 0.238 0.426   0.190         0.178       0.974     0.212   0.368      1.220
## 3of5_9   383 0.264 0.441   0.212         0.199       0.974     0.248   0.435      1.093
## 3of5_15  383 0.285 0.452  -0.037        -0.050       0.975    -0.045  -0.076      0.919
## 3of5_19  383 0.078 0.269  -0.123        -0.130       0.975    -0.227  -0.396      2.506
good_items_scored = all_items_scored %>% select(-!!(all_items_stats$itemstats %>% filter(g_loading < .25) %>% rownames()))

#refit
good_items_fit = cache_object(filename = "data/good_items_fit.rds", expr = mirt(
good_items_scored %>% select(-id),
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 2000)
),
renew = renew_all)
## Cache found, reading object from disk
good_items_fit
## 
## Call:
## mirt(data = good_items_scored %>% select(-id), model = 1, itemtype = "2PL", 
##     technical = list(NCYCLES = 2000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 328 EM iterations.
## mirt version: 1.42 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -36583
## Estimated parameters: 438 
## AIC = 74042
## BIC = 75833; SABIC = 74443
good_items_fit %>% summary()
##                                       F1     h2
## a_specific_number                  0.456 0.2083
## a_type_of_brass_instrument         0.877 0.7699
## a_type_of_drapery                  0.652 0.4257
## a_type_of_fabric_101               0.744 0.5532
## a_type_of_fabric_98                0.700 0.4895
## a_type_of_grasshopper              0.803 0.6452
## a_type_of_hat                      0.574 0.3293
## a_type_of_heating_unit             0.498 0.2478
## a_type_of_magistrate_position      0.746 0.5566
## a_type_of_mathematical_operation   0.569 0.3240
## a_type_of_overshoe                 0.495 0.2455
## a_type_of_plant                    0.734 0.5392
## a_type_of_religious_teacher        0.779 0.6073
## a_type_of_rock                     0.321 0.1028
## a_type_of_skirts                   0.642 0.4126
## a_type_of_smokeless_powder         0.467 0.2178
## a_type_of_sound                    0.490 0.2403
## a_type_of_sword                    0.356 0.1265
## a_type_of_volcanic_crater          0.666 0.4441
## a_type_of_wavy_form                0.706 0.4986
## actionable_negligence              0.769 0.5916
## advisory                           0.723 0.5231
## advocate                           0.702 0.4921
## almighty                           0.914 0.8357
## amazement                          0.661 0.4366
## amenability                        0.740 0.5471
## auspices                           0.688 0.4727
## barely_able_to_read_and_write      0.841 0.7067
## because                            0.680 0.4628
## berate                             0.836 0.6993
## blade                              0.783 0.6130
## blend                              0.683 0.4659
## blunder                            0.860 0.7401
## bow                                0.751 0.5647
## brief                              0.432 0.1866
## bring_about                        0.790 0.6240
## carelessly_or_hastily_put_together 0.609 0.3711
## celebration                        0.719 0.5167
## cheat                              0.806 0.6497
## clay_pigeon_shooting               0.767 0.5885
## coarse                             0.776 0.6028
## collect_or_study_insects           0.626 0.3922
## colossal                           0.713 0.5078
## commotion                          0.903 0.8150
## complainer                         0.795 0.6323
## confiscate                         0.909 0.8268
## congratulate                       0.591 0.3491
## contemplate                        0.468 0.2186
## convoy                             0.855 0.7303
## couch                              0.651 0.4239
## crease                             0.762 0.5811
## cunning                            0.639 0.4085
## deceit                             0.575 0.3301
## deliberately                       0.761 0.5790
## deprive                            0.636 0.4043
## detectable                         0.338 0.1142
## diatribes                          0.579 0.3350
## disjoined                          0.311 0.0970
## disrupt                            0.791 0.6253
## distinct                           0.275 0.0756
## divergence                         0.796 0.6334
## dome                               0.552 0.3046
## downfall                           0.775 0.6010
## drink                              0.851 0.7244
## elite                              0.700 0.4896
## embarrassment                      0.602 0.3624
## emphasize                          0.881 0.7753
## empire                             0.505 0.2554
## empty                              0.759 0.5763
## entanglement                       0.691 0.4775
## environment                        0.659 0.4343
## excite                             0.775 0.6010
## flammable                          0.920 0.8466
## flatterer                          0.868 0.7527
## flatteries                         0.646 0.4178
## forever                            0.799 0.6379
## gigantic                           0.763 0.5826
## girl                               0.779 0.6066
## goo                                0.717 0.5147
## goodbye                            0.820 0.6732
## greed                              0.841 0.7073
## groan                              0.851 0.7244
## gruesome                           0.666 0.4434
## guarantee                          0.283 0.0801
## gutter                             0.658 0.4326
## harmfulness                        0.523 0.2739
## hobby                              0.806 0.6502
## hut                                0.811 0.6582
## illness                            0.491 0.2413
## impromptu                          0.660 0.4359
## indescribable                      0.762 0.5802
## intellectual                       0.711 0.5054
## jargon                             0.926 0.8573
## knowledgeable                      0.817 0.6681
## lackadaisical                      0.606 0.3676
## manager                            0.868 0.7529
## meal                               0.792 0.6277
## melodic                            0.729 0.5313
## mutually                           0.873 0.7630
## nonsense                           0.861 0.7410
## nonsensical                        0.872 0.7600
## not_coveted                        0.664 0.4407
## pamper                             0.590 0.3487
## penitentiary                       0.528 0.2784
## perplexing                         0.691 0.4768
## persistence                        0.638 0.4075
## predetermine                       0.542 0.2940
## pretender                          0.857 0.7348
## questioning                        0.662 0.4380
## quickly                            0.657 0.4321
## rebellious                         0.803 0.6452
## referee                            0.772 0.5961
## referendum                         0.525 0.2760
## relating_to_the_right              0.350 0.1227
## relevant                           0.699 0.4890
## respectful                         0.771 0.5937
## retailer                           0.601 0.3612
## retract                            0.524 0.2751
## ropes                              0.799 0.6381
## sag                                0.596 0.3548
## schemer                            0.669 0.4478
## seize                              0.793 0.6295
## sensitivity                        0.717 0.5145
## shadows                            0.658 0.4333
## sketch                             0.655 0.4284
## slang                              0.803 0.6442
## slave                              0.481 0.2313
## sluggish                           0.634 0.4020
## somber                             0.606 0.3669
## spinelessness                      0.632 0.4000
## sporadic                           0.832 0.6916
## squad                              0.975 0.9509
## stagger                            0.652 0.4256
## stinking                           0.877 0.7686
## stroll                             0.872 0.7606
## stubborn_100                       0.552 0.3050
## stubborn_143                       0.654 0.4282
## stylish                            0.693 0.4796
## summit                             0.756 0.5713
## terminology                        0.830 0.6882
## the_science_of_speech_sounds       0.751 0.5646
## transportation                     0.298 0.0890
## tyrant                             0.439 0.1932
## unhealthful                        0.419 0.1756
## vile                               0.881 0.7770
## vulgar                             0.720 0.5181
## wandering                          0.415 0.1724
## warning                            0.637 0.4063
## wave                               0.721 0.5199
## weaponry                           0.676 0.4571
## 1of5_1                             0.440 0.1938
## 1of5_2                             0.289 0.0838
## 1of5_3                             0.378 0.1431
## 1of5_4                             0.311 0.0969
## 2of5_1                             0.411 0.1685
## 2of5_2                             0.536 0.2872
## 2of5_3                             0.581 0.3376
## 2of5_4                             0.619 0.3830
## 2of5_5                             0.521 0.2714
## 2of5_6                             0.736 0.5414
## 2of5_7                             0.375 0.1406
## 2of5_8                             0.585 0.3427
## 2of5_9                             0.554 0.3074
## 2of5_10                            0.378 0.1428
## 2of5_11                            0.371 0.1373
## 2of5_12                            0.418 0.1749
## 2of5_13                            0.491 0.2410
## 2of5_14                            0.402 0.1612
## 2of5_15                            0.677 0.4578
## 2of5_16                            0.574 0.3299
## 2of5_17                            0.679 0.4617
## 2of5_18                            0.735 0.5400
## 2of5_19                            0.447 0.1997
## 2of5_20                            0.384 0.1477
## 2of5_21                            0.514 0.2646
## 2of5_22                            0.881 0.7754
## 2of5_23                            0.310 0.0964
## 2of5_25                            0.734 0.5394
## 2of5_26                            0.711 0.5058
## 2of5_27                            0.582 0.3385
## 2of5_28                            0.275 0.0755
## 2of5_29                            0.548 0.3007
## 2of5_30                            0.552 0.3047
## 2of5_31                            0.538 0.2900
## 2of5_32                            0.627 0.3926
## 2of5_33                            0.333 0.1111
## 2of5_34                            0.453 0.2055
## 2of5_35                            0.295 0.0871
## 2of5_36                            0.764 0.5831
## 2of5_37                            0.856 0.7335
## 3of5_1                             0.271 0.0732
## 3of5_2                             0.494 0.2441
## 3of5_3                             0.476 0.2263
## 3of5_4                             0.707 0.4992
## 3of5_5                             0.329 0.1082
## 3of5_6                             0.750 0.5630
## 3of5_7                             0.594 0.3526
## 3of5_8                             0.411 0.1690
## 3of5_10                            0.850 0.7228
## 3of5_11                            0.723 0.5220
## 3of5_12                            0.546 0.2980
## 3of5_13                            0.365 0.1330
## 3of5_14                            0.508 0.2577
## 3of5_16                            0.326 0.1065
## 3of5_17                            0.386 0.1486
## 3of5_18                            0.319 0.1018
## 3of5_20                            0.553 0.3056
## 3of5_21                            0.422 0.1781
## 3of5_22                            0.443 0.1960
## 3of5_23                            0.860 0.7398
## 3of5_24                            0.549 0.3018
## 3of5_25                            0.526 0.2766
## 3of5_26                            0.709 0.5033
## 3of5_27                            0.596 0.3551
## 3of5_28                            0.413 0.1709
## 3of5_29                            0.541 0.2926
## 3of5_30                            0.510 0.2606
## 3of5_31                            0.426 0.1812
## 3of5_32                            0.576 0.3316
## 
## SS loadings:  94.3 
## Proportion Var:  0.431 
## 
## Factor correlations: 
## 
##    F1
## F1  1
good_items_stats = itemstats(good_items_scored %>% select(-id))
good_items_stats$itemstats$g_loading = good_items_fit %>% summary() %>% .$rotF %>% as.vector()
##                                       F1     h2
## a_specific_number                  0.456 0.2083
## a_type_of_brass_instrument         0.877 0.7699
## a_type_of_drapery                  0.652 0.4257
## a_type_of_fabric_101               0.744 0.5532
## a_type_of_fabric_98                0.700 0.4895
## a_type_of_grasshopper              0.803 0.6452
## a_type_of_hat                      0.574 0.3293
## a_type_of_heating_unit             0.498 0.2478
## a_type_of_magistrate_position      0.746 0.5566
## a_type_of_mathematical_operation   0.569 0.3240
## a_type_of_overshoe                 0.495 0.2455
## a_type_of_plant                    0.734 0.5392
## a_type_of_religious_teacher        0.779 0.6073
## a_type_of_rock                     0.321 0.1028
## a_type_of_skirts                   0.642 0.4126
## a_type_of_smokeless_powder         0.467 0.2178
## a_type_of_sound                    0.490 0.2403
## a_type_of_sword                    0.356 0.1265
## a_type_of_volcanic_crater          0.666 0.4441
## a_type_of_wavy_form                0.706 0.4986
## actionable_negligence              0.769 0.5916
## advisory                           0.723 0.5231
## advocate                           0.702 0.4921
## almighty                           0.914 0.8357
## amazement                          0.661 0.4366
## amenability                        0.740 0.5471
## auspices                           0.688 0.4727
## barely_able_to_read_and_write      0.841 0.7067
## because                            0.680 0.4628
## berate                             0.836 0.6993
## blade                              0.783 0.6130
## blend                              0.683 0.4659
## blunder                            0.860 0.7401
## bow                                0.751 0.5647
## brief                              0.432 0.1866
## bring_about                        0.790 0.6240
## carelessly_or_hastily_put_together 0.609 0.3711
## celebration                        0.719 0.5167
## cheat                              0.806 0.6497
## clay_pigeon_shooting               0.767 0.5885
## coarse                             0.776 0.6028
## collect_or_study_insects           0.626 0.3922
## colossal                           0.713 0.5078
## commotion                          0.903 0.8150
## complainer                         0.795 0.6323
## confiscate                         0.909 0.8268
## congratulate                       0.591 0.3491
## contemplate                        0.468 0.2186
## convoy                             0.855 0.7303
## couch                              0.651 0.4239
## crease                             0.762 0.5811
## cunning                            0.639 0.4085
## deceit                             0.575 0.3301
## deliberately                       0.761 0.5790
## deprive                            0.636 0.4043
## detectable                         0.338 0.1142
## diatribes                          0.579 0.3350
## disjoined                          0.311 0.0970
## disrupt                            0.791 0.6253
## distinct                           0.275 0.0756
## divergence                         0.796 0.6334
## dome                               0.552 0.3046
## downfall                           0.775 0.6010
## drink                              0.851 0.7244
## elite                              0.700 0.4896
## embarrassment                      0.602 0.3624
## emphasize                          0.881 0.7753
## empire                             0.505 0.2554
## empty                              0.759 0.5763
## entanglement                       0.691 0.4775
## environment                        0.659 0.4343
## excite                             0.775 0.6010
## flammable                          0.920 0.8466
## flatterer                          0.868 0.7527
## flatteries                         0.646 0.4178
## forever                            0.799 0.6379
## gigantic                           0.763 0.5826
## girl                               0.779 0.6066
## goo                                0.717 0.5147
## goodbye                            0.820 0.6732
## greed                              0.841 0.7073
## groan                              0.851 0.7244
## gruesome                           0.666 0.4434
## guarantee                          0.283 0.0801
## gutter                             0.658 0.4326
## harmfulness                        0.523 0.2739
## hobby                              0.806 0.6502
## hut                                0.811 0.6582
## illness                            0.491 0.2413
## impromptu                          0.660 0.4359
## indescribable                      0.762 0.5802
## intellectual                       0.711 0.5054
## jargon                             0.926 0.8573
## knowledgeable                      0.817 0.6681
## lackadaisical                      0.606 0.3676
## manager                            0.868 0.7529
## meal                               0.792 0.6277
## melodic                            0.729 0.5313
## mutually                           0.873 0.7630
## nonsense                           0.861 0.7410
## nonsensical                        0.872 0.7600
## not_coveted                        0.664 0.4407
## pamper                             0.590 0.3487
## penitentiary                       0.528 0.2784
## perplexing                         0.691 0.4768
## persistence                        0.638 0.4075
## predetermine                       0.542 0.2940
## pretender                          0.857 0.7348
## questioning                        0.662 0.4380
## quickly                            0.657 0.4321
## rebellious                         0.803 0.6452
## referee                            0.772 0.5961
## referendum                         0.525 0.2760
## relating_to_the_right              0.350 0.1227
## relevant                           0.699 0.4890
## respectful                         0.771 0.5937
## retailer                           0.601 0.3612
## retract                            0.524 0.2751
## ropes                              0.799 0.6381
## sag                                0.596 0.3548
## schemer                            0.669 0.4478
## seize                              0.793 0.6295
## sensitivity                        0.717 0.5145
## shadows                            0.658 0.4333
## sketch                             0.655 0.4284
## slang                              0.803 0.6442
## slave                              0.481 0.2313
## sluggish                           0.634 0.4020
## somber                             0.606 0.3669
## spinelessness                      0.632 0.4000
## sporadic                           0.832 0.6916
## squad                              0.975 0.9509
## stagger                            0.652 0.4256
## stinking                           0.877 0.7686
## stroll                             0.872 0.7606
## stubborn_100                       0.552 0.3050
## stubborn_143                       0.654 0.4282
## stylish                            0.693 0.4796
## summit                             0.756 0.5713
## terminology                        0.830 0.6882
## the_science_of_speech_sounds       0.751 0.5646
## transportation                     0.298 0.0890
## tyrant                             0.439 0.1932
## unhealthful                        0.419 0.1756
## vile                               0.881 0.7770
## vulgar                             0.720 0.5181
## wandering                          0.415 0.1724
## warning                            0.637 0.4063
## wave                               0.721 0.5199
## weaponry                           0.676 0.4571
## 1of5_1                             0.440 0.1938
## 1of5_2                             0.289 0.0838
## 1of5_3                             0.378 0.1431
## 1of5_4                             0.311 0.0969
## 2of5_1                             0.411 0.1685
## 2of5_2                             0.536 0.2872
## 2of5_3                             0.581 0.3376
## 2of5_4                             0.619 0.3830
## 2of5_5                             0.521 0.2714
## 2of5_6                             0.736 0.5414
## 2of5_7                             0.375 0.1406
## 2of5_8                             0.585 0.3427
## 2of5_9                             0.554 0.3074
## 2of5_10                            0.378 0.1428
## 2of5_11                            0.371 0.1373
## 2of5_12                            0.418 0.1749
## 2of5_13                            0.491 0.2410
## 2of5_14                            0.402 0.1612
## 2of5_15                            0.677 0.4578
## 2of5_16                            0.574 0.3299
## 2of5_17                            0.679 0.4617
## 2of5_18                            0.735 0.5400
## 2of5_19                            0.447 0.1997
## 2of5_20                            0.384 0.1477
## 2of5_21                            0.514 0.2646
## 2of5_22                            0.881 0.7754
## 2of5_23                            0.310 0.0964
## 2of5_25                            0.734 0.5394
## 2of5_26                            0.711 0.5058
## 2of5_27                            0.582 0.3385
## 2of5_28                            0.275 0.0755
## 2of5_29                            0.548 0.3007
## 2of5_30                            0.552 0.3047
## 2of5_31                            0.538 0.2900
## 2of5_32                            0.627 0.3926
## 2of5_33                            0.333 0.1111
## 2of5_34                            0.453 0.2055
## 2of5_35                            0.295 0.0871
## 2of5_36                            0.764 0.5831
## 2of5_37                            0.856 0.7335
## 3of5_1                             0.271 0.0732
## 3of5_2                             0.494 0.2441
## 3of5_3                             0.476 0.2263
## 3of5_4                             0.707 0.4992
## 3of5_5                             0.329 0.1082
## 3of5_6                             0.750 0.5630
## 3of5_7                             0.594 0.3526
## 3of5_8                             0.411 0.1690
## 3of5_10                            0.850 0.7228
## 3of5_11                            0.723 0.5220
## 3of5_12                            0.546 0.2980
## 3of5_13                            0.365 0.1330
## 3of5_14                            0.508 0.2577
## 3of5_16                            0.326 0.1065
## 3of5_17                            0.386 0.1486
## 3of5_18                            0.319 0.1018
## 3of5_20                            0.553 0.3056
## 3of5_21                            0.422 0.1781
## 3of5_22                            0.443 0.1960
## 3of5_23                            0.860 0.7398
## 3of5_24                            0.549 0.3018
## 3of5_25                            0.526 0.2766
## 3of5_26                            0.709 0.5033
## 3of5_27                            0.596 0.3551
## 3of5_28                            0.413 0.1709
## 3of5_29                            0.541 0.2926
## 3of5_30                            0.510 0.2606
## 3of5_31                            0.426 0.1812
## 3of5_32                            0.576 0.3316
## 
## SS loadings:  94.3 
## Proportion Var:  0.431 
## 
## Factor correlations: 
## 
##    F1
## F1  1
good_items_stats$itemstats$discrim = coef(good_items_fit, simplify = T)$items[, 1] %>% unname()
good_items_stats$itemstats$difficulty = -coef(good_items_fit, simplify = T)$items[, 2] %>% unname()
good_items_stats$itemstats
##                                      N  mean    sd total.r total.r_if_rm alpha_if_rm g_loading discrim difficulty
## a_specific_number                  441 0.841 0.366   0.257         0.247       0.975     0.456   0.873     -1.901
## a_type_of_brass_instrument         441 0.932 0.252   0.387         0.381       0.975     0.877   3.113     -4.945
## a_type_of_drapery                  441 0.823 0.382   0.389         0.380       0.975     0.652   1.465     -2.098
## a_type_of_fabric_101               441 0.934 0.248   0.320         0.314       0.975     0.744   1.894     -3.742
## a_type_of_fabric_98                441 0.755 0.431   0.479         0.470       0.975     0.700   1.667     -1.707
## a_type_of_grasshopper              441 0.821 0.384   0.510         0.502       0.975     0.803   2.295     -2.683
## a_type_of_hat                      441 0.356 0.479   0.468         0.456       0.975     0.574   1.192      0.723
## a_type_of_heating_unit             441 0.871 0.336   0.248         0.239       0.975     0.498   0.977     -2.215
## a_type_of_magistrate_position      441 0.898 0.303   0.357         0.350       0.975     0.746   1.907     -3.199
## a_type_of_mathematical_operation   441 0.921 0.271   0.263         0.255       0.975     0.569   1.178     -2.927
## a_type_of_overshoe                 441 0.635 0.482   0.396         0.384       0.975     0.495   0.971     -0.690
## a_type_of_plant                    441 0.862 0.346   0.422         0.414       0.975     0.734   1.841     -2.726
## a_type_of_religious_teacher        441 0.855 0.353   0.462         0.454       0.975     0.779   2.117     -2.878
## a_type_of_rock                     441 0.948 0.223   0.100         0.094       0.975     0.321   0.576     -3.036
## a_type_of_skirts                   441 0.828 0.378   0.386         0.377       0.975     0.642   1.426     -2.111
## a_type_of_smokeless_powder         441 0.753 0.432   0.301         0.290       0.975     0.467   0.898     -1.306
## a_type_of_sound                    441 0.680 0.467   0.364         0.353       0.975     0.490   0.957     -0.921
## a_type_of_sword                    441 0.599 0.491   0.258         0.245       0.975     0.356   0.648     -0.449
## a_type_of_volcanic_crater          441 0.855 0.353   0.380         0.371       0.975     0.666   1.521     -2.417
## a_type_of_wavy_form                441 0.615 0.487   0.549         0.539       0.975     0.706   1.697     -0.806
## actionable_negligence              441 0.841 0.366   0.449         0.440       0.975     0.769   2.048     -2.685
## advisory                           441 0.959 0.198   0.242         0.237       0.975     0.723   1.783     -4.193
## advocate                           441 0.751 0.433   0.502         0.492       0.975     0.702   1.675     -1.680
## almighty                           441 0.961 0.193   0.354         0.349       0.975     0.914   3.839     -6.568
## amazement                          441 0.918 0.274   0.305         0.298       0.975     0.661   1.498     -3.137
## amenability                        441 0.832 0.374   0.422         0.413       0.975     0.740   1.871     -2.462
## auspices                           441 0.494 0.501   0.558         0.548       0.975     0.688   1.611     -0.062
## barely_able_to_read_and_write      441 0.880 0.326   0.473         0.466       0.975     0.841   2.642     -3.630
## because                            441 0.912 0.284   0.284         0.277       0.975     0.680   1.580     -3.105
## berate                             441 0.857 0.350   0.451         0.443       0.975     0.836   2.596     -3.314
## blade                              441 0.939 0.240   0.342         0.336       0.975     0.783   2.142     -4.068
## blend                              441 0.912 0.284   0.329         0.321       0.975     0.683   1.590     -3.114
## blunder                            441 0.961 0.193   0.230         0.225       0.975     0.860   2.872     -5.416
## bow                                441 0.596 0.491   0.595         0.586       0.975     0.751   1.938     -0.774
## brief                              441 0.571 0.495   0.429         0.417       0.975     0.432   0.815     -0.350
## bring_about                        441 0.868 0.338   0.438         0.430       0.975     0.790   2.192     -3.090
## carelessly_or_hastily_put_together 441 0.866 0.341   0.312         0.304       0.975     0.609   1.308     -2.377
## celebration                        441 0.757 0.429   0.477         0.467       0.975     0.719   1.760     -1.779
## cheat                              441 0.723 0.448   0.577         0.569       0.975     0.806   2.318     -1.864
## clay_pigeon_shooting               441 0.943 0.232   0.292         0.286       0.975     0.767   2.035     -4.057
## coarse                             441 0.941 0.236   0.340         0.333       0.975     0.776   2.097     -4.069
## collect_or_study_insects           441 0.932 0.252   0.272         0.265       0.975     0.626   1.367     -3.247
## colossal                           441 0.823 0.382   0.419         0.410       0.975     0.713   1.729     -2.278
## commotion                          441 0.853 0.355   0.563         0.556       0.975     0.903   3.572     -4.156
## complainer                         441 0.839 0.368   0.483         0.475       0.975     0.795   2.232     -2.810
## confiscate                         441 0.896 0.306   0.483         0.476       0.975     0.909   3.718     -4.932
## congratulate                       441 0.887 0.317   0.294         0.285       0.975     0.591   1.246     -2.546
## contemplate                        441 0.902 0.297   0.242         0.234       0.975     0.468   0.900     -2.509
## convoy                             441 0.764 0.425   0.594         0.586       0.975     0.855   2.801     -2.538
## couch                              441 0.757 0.429   0.434         0.424       0.975     0.651   1.460     -1.605
## crease                             441 0.830 0.376   0.470         0.461       0.975     0.762   2.005     -2.542
## cunning                            441 0.796 0.403   0.369         0.359       0.975     0.639   1.414     -1.854
## deceit                             441 0.746 0.436   0.391         0.380       0.975     0.575   1.195     -1.395
## deliberately                       441 0.812 0.391   0.479         0.471       0.975     0.761   1.996     -2.372
## deprive                            441 0.837 0.370   0.386         0.377       0.975     0.636   1.402     -2.171
## detectable                         441 0.966 0.181   0.103         0.097       0.975     0.338   0.611     -3.504
## diatribes                          441 0.624 0.485   0.453         0.441       0.975     0.579   1.208     -0.699
## disjoined                          441 0.494 0.501   0.272         0.259       0.975     0.311   0.558      0.016
## disrupt                            441 0.959 0.198   0.274         0.269       0.975     0.791   2.198     -4.609
## distinct                           441 0.857 0.350   0.186         0.176       0.975     0.275   0.487     -1.873
## divergence                         441 0.698 0.459   0.591         0.582       0.975     0.796   2.237     -1.624
## dome                               441 0.744 0.437   0.393         0.383       0.975     0.552   1.126     -1.349
## downfall                           441 0.898 0.303   0.407         0.400       0.975     0.775   2.089     -3.359
## drink                              441 0.769 0.422   0.558         0.550       0.975     0.851   2.759     -2.550
## elite                              441 0.853 0.355   0.384         0.376       0.975     0.700   1.667     -2.501
## embarrassment                      441 0.875 0.331   0.313         0.304       0.975     0.602   1.283     -2.451
## emphasize                          441 0.966 0.181   0.286         0.282       0.975     0.881   3.161     -5.909
## empire                             441 0.816 0.388   0.317         0.307       0.975     0.505   0.997     -1.771
## empty                              441 0.848 0.359   0.448         0.439       0.975     0.759   1.985     -2.702
## entanglement                       441 0.542 0.499   0.539         0.528       0.975     0.691   1.627     -0.349
## environment                        441 0.503 0.501   0.541         0.531       0.975     0.659   1.491     -0.101
## excite                             441 0.707 0.455   0.581         0.572       0.975     0.775   2.089     -1.608
## flammable                          441 0.986 0.116   0.223         0.220       0.975     0.920   3.999     -8.102
## flatterer                          441 0.844 0.364   0.558         0.551       0.975     0.868   2.969     -3.489
## flatteries                         441 0.732 0.443   0.482         0.472       0.975     0.646   1.442     -1.430
## forever                            441 0.864 0.343   0.453         0.445       0.975     0.799   2.259     -3.096
## gigantic                           441 0.952 0.213   0.242         0.237       0.975     0.763   2.011     -4.239
## girl                               441 0.875 0.331   0.418         0.410       0.975     0.779   2.113     -3.100
## goo                                441 0.971 0.169   0.219         0.215       0.975     0.717   1.753     -4.524
## goodbye                            441 0.948 0.223   0.326         0.320       0.975     0.820   2.443     -4.572
## greed                              441 0.649 0.478   0.668         0.660       0.975     0.841   2.646     -1.450
## groan                              441 0.912 0.284   0.391         0.384       0.975     0.851   2.760     -4.202
## gruesome                           441 0.834 0.372   0.392         0.383       0.975     0.666   1.519     -2.230
## guarantee                          441 0.930 0.256   0.108         0.100       0.975     0.283   0.502     -2.683
## gutter                             441 0.941 0.236   0.223         0.217       0.975     0.658   1.486     -3.509
## harmfulness                        441 0.868 0.338   0.312         0.303       0.975     0.523   1.045     -2.233
## hobby                              441 0.880 0.326   0.443         0.436       0.975     0.806   2.321     -3.336
## hut                                441 0.889 0.315   0.454         0.446       0.975     0.811   2.362     -3.488
## illness                            441 0.717 0.451   0.334         0.323       0.975     0.491   0.960     -1.120
## impromptu                          441 0.626 0.484   0.513         0.503       0.975     0.660   1.496     -0.804
## indescribable                      441 0.955 0.208   0.291         0.286       0.975     0.762   2.001     -4.286
## intellectual                       441 0.943 0.232   0.296         0.290       0.975     0.711   1.720     -3.759
## jargon                             441 0.959 0.198   0.346         0.341       0.975     0.926   4.172     -6.900
## knowledgeable                      441 0.823 0.382   0.513         0.505       0.975     0.817   2.415     -2.800
## lackadaisical                      441 0.741 0.438   0.416         0.405       0.975     0.606   1.298     -1.416
## manager                            441 0.986 0.116   0.192         0.189       0.975     0.868   2.971     -6.748
## meal                               441 0.610 0.488   0.627         0.619       0.975     0.792   2.210     -0.965
## melodic                            441 0.873 0.333   0.422         0.414       0.975     0.729   1.812     -2.823
## mutually                           441 0.966 0.181   0.310         0.305       0.975     0.873   3.054     -5.781
## nonsense                           441 0.873 0.333   0.493         0.486       0.975     0.861   2.879     -3.766
## nonsensical                        441 0.973 0.163   0.306         0.301       0.975     0.872   3.029     -6.021
## not_coveted                        441 0.853 0.355   0.371         0.362       0.975     0.664   1.511     -2.388
## pamper                             441 0.467 0.499   0.452         0.441       0.975     0.590   1.245      0.118
## penitentiary                       441 0.918 0.274   0.241         0.234       0.975     0.528   1.057     -2.811
## perplexing                         441 0.905 0.294   0.376         0.369       0.975     0.691   1.625     -3.050
## persistence                        441 0.862 0.346   0.350         0.341       0.975     0.638   1.412     -2.405
## predetermine                       441 0.916 0.278   0.256         0.249       0.975     0.542   1.098     -2.807
## pretender                          441 0.991 0.095   0.139         0.136       0.975     0.857   2.833     -7.019
## questioning                        441 0.900 0.300   0.343         0.335       0.975     0.662   1.502     -2.894
## quickly                            441 0.789 0.408   0.400         0.391       0.975     0.657   1.485     -1.845
## rebellious                         441 0.832 0.374   0.488         0.480       0.975     0.803   2.295     -2.793
##  [ reached 'max' / getOption("max.print") -- omitted 108 rows ]
good_items_stats$itemstats %>%
describe2()
## # A tibble: 9 × 10
##   var               n    mean  median         sd       mad      min     max    skew kurtosis
##   <chr>         <dbl>   <dbl>   <dbl>      <dbl>     <dbl>    <dbl>   <dbl>   <dbl>    <dbl>
## 1 N               219 423.    441     27.0       0         383      441     -0.791   -1.38  
## 2 mean            219   0.692   0.757  0.232     0.229       0.0992   0.997 -0.714   -0.618 
## 3 sd              219   0.386   0.405  0.104     0.110       0.0511   0.501 -0.892    0.124 
## 4 total.r         219   0.391   0.396  0.119     0.124       0.0999   0.725  0.0355  -0.280 
## 5 total.r_if_rm   219   0.382   0.384  0.119     0.123       0.0942   0.718  0.0607  -0.294 
## 6 alpha_if_rm     219   0.975   0.975  0.0000854 0.0000882   0.975    0.975 -0.498   -0.0909
## 7 g_loading       219   0.634   0.658  0.170     0.180       0.271    0.975 -0.346   -0.798 
## 8 discrim         219   1.62    1.49   0.861     0.763       0.478    7.49   1.95     8.85  
## 9 difficulty      219  -1.75   -1.71   2.22      2.06      -14.3      2.74  -1.10     3.99
#difficulties
good_items_stats$itemstats$mean %>% GG_denhist() +
scale_x_continuous("Pass rate")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items pass rate.png")
#loadings
good_items_stats$itemstats$g_loading %>% GG_denhist() +
scale_x_continuous("Factor loading")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items factor loading.png")
#reliability
good_items_fit_scores = fscores(good_items_fit, full.scores.SE = T)
empirical_rxx(good_items_fit_scores)
##    F1 
## 0.972
#which range has >.90?
d_rxx_info = rxx_info = get_reliabilities(good_items_fit)

d_rxx_info %>%
filter(rel > .90) %>%
describe2()
## # A tibble: 2 × 10
##   var       n   mean median     sd    mad    min   max      skew kurtosis
##   <chr> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1 z       104 -0.784 -0.784 1.82   2.32   -3.89  2.32   1.20e-16   -1.23 
## 2 rel     104  0.966  0.976 0.0263 0.0224  0.900 0.992 -9.02e- 1   -0.391
d_rxx_info %>%
filter(rel > .80) %>%
describe2()
## # A tibble: 2 × 10
##   var       n   mean median     sd    mad    min   max      skew kurtosis
##   <chr> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1 z       133 -0.573 -0.573 2.32   2.95   -4.55  3.41   1.95e-16   -1.23 
## 2 rel     133  0.942  0.963 0.0536 0.0407  0.803 0.992 -1.01e+ 0   -0.152
d_rxx_info %>%
ggplot(aes(z, rel)) +
geom_line() +
scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
scale_x_continuous("Ability level (z)") +
coord_cartesian(xlim = c(-4, 4))

GG_save("figs/good items reliability as function of ability.png")
d_rxx_info %>%
filter(z >= -2, z <= 2) %>%
describe2()
## # A tibble: 2 × 10
##   var       n      mean    median     sd    mad    min   max      skew kurtosis
##   <chr> <dbl>     <dbl>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1 z        66 -2.42e-16 -4.44e-16 1.16   1.48   -1.96  1.96  -6.46e-17   -1.25 
## 2 rel      66  9.73e- 1  9.82e- 1 0.0210 0.0151  0.921 0.992 -9.41e- 1   -0.384
d_rxx_info %>%
filter(z >= -3, z <= 3) %>%
describe2()
## # A tibble: 2 × 10
##   var       n      mean    median     sd    mad    min   max      skew kurtosis
##   <chr> <dbl>     <dbl>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1 z       100 -2.45e-16 -4.44e-16 1.75   2.24   -2.98  2.98   2.33e-16   -1.24 
## 2 rel     100  9.60e- 1  9.77e- 1 0.0389 0.0209  0.848 0.992 -1.30e+ 0    0.574
#difficulty and g-loading
good_items_stats$itemstats %>%
rownames_to_column() %>%
GG_scatter("mean", "g_loading", case_names = "rowname") +
xlab("Pass rate") +
ylab("Factor loading")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/good items scatter pass rate loading.png")
## `geom_smooth()` using formula = 'y ~ x'
#merge g score back to main dataset
d2 = left_join(
d,
tibble(id = good_items_scored$id,
g = good_items_fit_scores[, 1]),
by = c("Participant_id" = "id")
)
assert_that(!anyDuplicated(d2$Participant_id))
## [1] TRUE
#Norming, the age problem
GG_scatter(d2, "age", "g") +
geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#scores by age group
d2$age_group = discretize(d2$age, 3)

GG_group_means(d2, "g", "age_group", type = "boxplot") +
scale_y_continuous("Vocabulary IRT score") +
scale_x_discrete("Age group")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items boxplots by age.png")
describe2(d2$g, d2$age_group)
## New names:
## • `` -> `...1`
## # A tibble: 3 × 11
##   group       var       n    mean  median    sd   mad   min   max  skew kurtosis
##   <fct>       <chr> <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 [18.9,40.3] ...1    176 -0.390  -0.481  0.832 0.841 -2.27  1.81 0.592   -0.194
## 2 (40.3,61.7] ...1    174  0.0964 -0.0615 0.957 1.09  -1.76  2.55 0.460   -0.685
## 3 (61.7,83.1] ...1     91  0.562   0.529  1.02  1.03  -1.50  3.15 0.184   -0.191
#whites differ in age?
describe2(d2$age, d2$white_only)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
##   group var       n  mean median    sd   mad   min   max   skew kurtosis
##   <fct> <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl>
## 1 FALSE ...1    114  41.3     39  14.8  18.5    21    74  0.312    -1.09
## 2 TRUE  ...1    327  48.5     51  15.7  17.8    19    83 -0.121    -1.07
#white subset is easier to work with
d2_white = d2 %>% filter(white_only)
(white_ageadj_model = lm(g ~ age, data = d2_white))
## 
## Call:
## lm(formula = g ~ age, data = d2_white)
## 
## Coefficients:
## (Intercept)          age  
##     -1.1010       0.0251
#get resids, step 1
d2_white$g_ageadj1 = resid(white_ageadj_model)
(ageadj_desc_whites = describe2(d2_white$g_ageadj1))
## # A tibble: 1 × 10
##   var       n      mean  median    sd   mad   min   max  skew kurtosis
##   <chr> <dbl>     <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 x       327 -2.57e-17 -0.0648 0.901 0.958 -2.26  2.60 0.320   -0.281
#alternative is to just model the absolute resids directly
(absSD_ols_whites = lm(abs(g_ageadj1) ~ age, data = d2_white))
## 
## Call:
## lm(formula = abs(g_ageadj1) ~ age, data = d2_white)
## 
## Coefficients:
## (Intercept)          age  
##     0.46724      0.00544
#get age mean and SD adjusted scores
d2_white$g_ageadj2 = d2_white$g_ageadj1 / predict(absSD_ols_whites)

#does this work tho?
d2_white %>%
GG_scatter("age", "g_ageadj2")
## `geom_smooth()` using formula = 'y ~ x'

test_HS(d2_white$g_ageadj2, d2_white$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## # A tibble: 4 × 5
##   test           r2adj     p fit          log10_p
##   <chr>          <dbl> <dbl> <named list>   <dbl>
## 1 linear raw  -0.00300 0.872 <ols>         0.0594
## 2 spline raw  -0.00246 0.361 <ols>         0.443 
## 3 linear rank -0.00305 0.924 <ols>         0.0343
## 4 spline rank  0.00330 0.164 <ols>         0.785
#restore to white z score norms
(white_desc_ageadj2_desc = describe2(d2_white$g_ageadj2))
## # A tibble: 1 × 10
##   var       n     mean  median    sd   mad   min   max  skew kurtosis
##   <chr> <dbl>    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 x       327 0.000443 -0.0814  1.22  1.33 -2.59  3.73 0.397   -0.350
d2_white$g_ageadj3 = (d2_white$g_ageadj2 - white_desc_ageadj2_desc$mean) / white_desc_ageadj2_desc$sd
d2_white$g_ageadj3 %>% describe2()
## # A tibble: 1 × 10
##   var       n     mean  median    sd   mad   min   max  skew kurtosis
##   <chr> <dbl>    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 x       327 1.50e-17 -0.0668     1  1.08 -2.12  3.05 0.397   -0.350
#Thus finally, we can make IQs using a function
vocab_norms = kirkegaard::make_norms(
score = d2$g,
age = d2$age,
norm_group = d2$race_combos_common == "white"
)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.003**). Model used.
d2$IQ = vocab_norms$data$IQ

#plot results to see if they make sense
d2 %>%
GG_denhist("IQ", "white_only")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/good items IQ scores by Whiteness.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
describe2(d2$IQ, d2$white_only)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
##   group var       n  mean median    sd   mad   min   max  skew kurtosis
##   <fct> <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 FALSE ...1    114  95.2   92.6  16.1  15.8  62.0  149. 0.717    0.166
## 2 TRUE  ...1    327 100     99.0  15    16.3  68.2  146. 0.397   -0.350
#should be no age relationship within groups
d2 %>%
GG_scatter("age", "IQ")
## `geom_smooth()` using formula = 'y ~ x'

#and no heteroscedasticity
test_HS(d2$IQ, d2$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## # A tibble: 4 × 5
##   test           r2adj     p fit          log10_p
##   <chr>          <dbl> <dbl> <named list>   <dbl>
## 1 linear raw  -0.00226 0.937 <ols>         0.0282
## 2 spline raw  -0.00808 0.925 <ols>         0.0339
## 3 linear rank -0.00211 0.786 <ols>         0.105 
## 4 spline rank -0.00674 0.803 <ols>         0.0954

Validation items

d2_mmpi = d2 %>% select(I_am_easily_awakened_by_noise:I_like_movie_love_scenes) %>%
map_df(~mapvalues(., from = c("Yes", "No"), to = c(1, 0))) %>%
map_df(as.numeric)

#IQ means by MMPI
MMPI_IQ_means = map2_df(d2_mmpi, names(d2_mmpi), function(x, y) {
# browser()
desc = suppressMessages(describe2(d2$IQ, group = x))

tibble(
question = d_vars %>% filter(var_name == y) %>% pull(label),
yes = desc$mean[2],
no = desc$mean[1],
IQ_gap = yes-no,
abs_IQ_gap = abs(IQ_gap)
)
})

GG_scatter(d2, "Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct", "IQ") +
scale_x_continuous("Answer to 'How many items on the test you just took do you think you got correct?'")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/self-estimate vs. IQ.png")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d2, "Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did", "IQ")
## `geom_smooth()` using formula = 'y ~ x'

paired.r(
cor(d2$IQ, d2$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct, use = "pair"),
cor(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did, use = "pair"),
n = pairwiseCount(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did)[1, 1]
)
## Call: paired.r(xy = cor(d2$IQ, d2$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct, 
##     use = "pair"), xz = cor(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did, 
##     use = "pair"), n = pairwiseCount(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did)[1, 
##     1])
## [1] "test of difference between two independent correlations"
## z = 3.12  With probability =  0
describeBy(d2$IQ, d2$I_was_a_slow_learner_in_school)
## 
##  Descriptive statistics by group 
## group: No
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 385 99.6 15.5   98.9    98.9  17  62 149  87.2 0.39    -0.28 0.79
## ------------------------------------------------------------------------------------------------ 
## group: Yes
##    vars  n mean   sd median trimmed  mad  min max range skew kurtosis  se
## X1    1 56 92.8 13.5   89.7    91.4 11.2 70.3 132  61.9 0.96     0.44 1.8
describeBy(d2$IQ, d2$I_like_to_read_about_science)
## 
##  Descriptive statistics by group 
## group: No
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis  se
## X1    1 98 92.5 13.9   90.6    91.1 12.7  67 133  66.2 0.84     0.34 1.4
## ------------------------------------------------------------------------------------------------ 
## group: Yes
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 343  101 15.4   99.6    99.9 16.8  62 149  87.2 0.36    -0.28 0.83
describeBy(d2$IQ, d2$A_person_shouldn_t_be_punished_for_breaking_a_law_that_he_thinks_is_unreasonable)
## 
##  Descriptive statistics by group 
## group: No
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 392 99.4 15.6   98.5    98.5 17.6  62 149  87.2 0.45    -0.35 0.79
## ------------------------------------------------------------------------------------------------ 
## group: Yes
##    vars  n mean   sd median trimmed  mad  min max range  skew kurtosis   se
## X1    1 49 93.2 12.2   94.6    93.5 14.5 68.2 116  47.7 -0.17     -0.9 1.74
GG_scatter(d2, "Total_approvals", "g") +
geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Sex DIF

# Sex bias
sex_bias_fit = cache_object(filename = "data/sex_bias_fit.rds", expr = DIF_test(
items = good_items_scored %>% select(-id),
model = 1,
group = d2$sex,
technical = list(NCYCLES = 2000)
))
## Cache found, reading object from disk
#DIF items
sex_bias_fit$DIF_stats %>% filter(p < .05)
##                             item      groups converged    AIC   SABIC      HQ     BIC     X2 df     p number p_adj
## 1              a_type_of_drapery Male,Female      TRUE  -3.64  -1.810  -0.415   4.537  7.641  2 0.022      3 1.000
## 2           a_type_of_fabric_101 Male,Female      TRUE -10.43  -8.602  -7.207  -2.255 14.433  2 0.001      4 0.161
## 3            a_type_of_fabric_98 Male,Female      TRUE  -5.98  -4.151  -2.756   2.196  9.982  2 0.007      5 1.000
## 4                  a_type_of_hat Male,Female      TRUE  -8.22  -6.389  -4.994  -0.042  12.22  2 0.002      7 0.486
## 5                a_type_of_plant Male,Female      TRUE  -4.12  -2.289  -0.894   4.058   8.12  2 0.017     12 1.000
## 6               a_type_of_skirts Male,Female      TRUE -14.63 -12.804 -11.409  -6.457 18.635  2     0     15 0.020
## 7                       advocate Male,Female      TRUE  -2.41  -0.575   0.820   5.772  6.406  2 0.041     23 1.000
## 8                      amazement Male,Female      TRUE  -4.64  -2.805  -1.410   3.542  8.636  2 0.013     25 1.000
## 9                       auspices Male,Female      TRUE  -6.29  -4.454  -3.059   1.893 10.285  2 0.006     27 1.000
## 10 barely_able_to_read_and_write Male,Female      TRUE  -3.98  -2.149  -0.754   4.199   7.98  2 0.019     28 1.000
## 11                       because Male,Female      TRUE  -4.75  -2.917  -1.522   3.430  8.748  2 0.013     29 1.000
## 12                   celebration Male,Female      TRUE  -8.25  -6.419  -5.024  -0.072  12.25  2 0.002     38 0.479
## 13                      colossal Male,Female      TRUE -20.06 -18.229 -16.834 -11.882  24.06  2     0     43 0.001
## 14                        convoy Male,Female      TRUE  -5.86  -4.028  -2.633   2.319  9.859  2 0.007     49 1.000
## 15                         couch Male,Female      TRUE  -2.10  -0.274   1.121   6.073  6.105  2 0.047     50 1.000
## 16                         drink Male,Female      TRUE  -8.87  -7.034  -5.639  -0.687 12.865  2 0.002     64 0.352
## 17                          girl Male,Female      TRUE  -5.48  -3.646  -2.251   2.701  9.477  2 0.009     78 1.000
## 18                         greed Male,Female      TRUE  -3.55  -1.720  -0.325   4.627  7.551  2 0.023     81 1.000
## 19                       illness Male,Female      TRUE -10.90  -9.073  -7.678  -2.726 14.904  2 0.001     89 0.127
## 20                  intellectual Male,Female      TRUE  -4.14  -2.313  -0.918   4.034  8.144  2 0.017     92 1.000
## 21                      nonsense Male,Female      TRUE  -4.39  -2.558  -1.163   3.789  8.389  2 0.015    100 1.000
## 22                        pamper Male,Female      TRUE  -7.74  -5.908  -4.513   0.439 11.739  2 0.003    103 0.619
## 23                    rebellious Male,Female      TRUE -15.40 -13.565 -12.170  -7.218 19.396  2     0    111 0.013
## 24                    referendum Male,Female      TRUE  -4.25  -2.421  -1.026   3.926  8.252  2 0.016    113 1.000
## 25                        sketch Male,Female      TRUE  -4.11  -2.282  -0.887   4.065  8.113  2 0.017    125 1.000
## 26                        somber Male,Female      TRUE  -5.06  -3.232  -1.837   3.115  9.063  2 0.011    129 1.000
## 27                      sporadic Male,Female      TRUE  -4.60  -2.773  -1.378   3.574  8.604  2 0.014    131 1.000
## 28                        stroll Male,Female      TRUE  -2.18  -0.351   1.044   5.996  6.182  2 0.045    135 1.000
## 29                       stylish Male,Female      TRUE  -6.68  -4.850  -3.455   1.497 10.681  2 0.005    138 1.000
## 30                        vulgar Male,Female      TRUE  -4.48  -2.649  -1.254   3.698   8.48  2 0.014    146 1.000
## 31                          wave Male,Female      TRUE  -4.26  -2.433  -1.038   3.914  8.264  2 0.016    149 1.000
## 32                        2of5_1 Male,Female      TRUE  -2.11  -0.279   1.116   6.068   6.11  2 0.047    155 1.000
## 33                        2of5_5 Male,Female      TRUE  -6.08  -4.251  -2.856   2.096 10.082  2 0.006    159 1.000
## 34                       2of5_12 Male,Female      TRUE  -4.92  -3.087  -1.692   3.260  8.918  2 0.012    166 1.000
## 35                       2of5_13 Male,Female      TRUE  -2.65  -0.822   0.573   5.525  6.653  2 0.036    167 1.000
## 36                       2of5_14 Male,Female      TRUE  -9.58  -7.753  -6.358  -1.406 13.584  2 0.001    168 0.246
## 37                       2of5_17 Male,Female      TRUE  -5.99  -4.160  -2.765   2.188  9.991  2 0.007    171 1.000
## 38                       2of5_30 Male,Female      TRUE  -5.65  -3.817  -2.422   2.530  9.648  2 0.008    183 1.000
## 39                       2of5_31 Male,Female      TRUE  -3.33  -1.496  -0.101   4.851  7.327  2 0.026    184 1.000
## 40                        3of5_8 Male,Female      TRUE  -6.22  -4.390  -2.995   1.957 10.221  2 0.006    198 1.000
## 41                       3of5_10 Male,Female      TRUE  -2.90  -1.065   0.330   5.282  6.896  2 0.032    199 1.000
## 42                       3of5_21 Male,Female      TRUE -15.70 -13.866 -12.471  -7.519 19.697  2     0    208 0.012
## 43                       3of5_27 Male,Female      TRUE  -3.29  -1.454  -0.059   4.893  7.285  2 0.026    214 1.000
## 44                       3of5_28 Male,Female      TRUE  -2.43  -0.599   0.796   5.748   6.43  2  0.04    215 1.000
## 45                       3of5_29 Male,Female      TRUE -12.31 -10.474  -9.080  -4.127 16.306  2     0    216 0.063
sex_bias_fit$DIF_stats %>% filter(p_adj < .05)
##               item      groups converged   AIC SABIC    HQ    BIC     X2 df p number p_adj
## 1 a_type_of_skirts Male,Female      TRUE -14.6 -12.8 -11.4  -6.46 18.635  2 0     15 0.020
## 2         colossal Male,Female      TRUE -20.1 -18.2 -16.8 -11.88  24.06  2 0     43 0.001
## 3       rebellious Male,Female      TRUE -15.4 -13.6 -12.2  -7.22 19.396  2 0    111 0.013
## 4          3of5_21 Male,Female      TRUE -15.7 -13.9 -12.5  -7.52 19.697  2 0    208 0.012
sex_bias_fit$effect_size_test
## $liberal
##           Effect Size   Value
## 1                STDS  0.3903
## 2                UTDS  4.6722
## 3              UETSDS  0.4603
## 4               ETSSD  0.0119
## 5         Starks.DTFR  0.3253
## 6               UDTFR  4.4877
## 7              UETSDN  0.5160
## 8 theta.of.max.test.D -0.2722
## 9           Test.Dmax  0.8981
## 
## $conservative
##           Effect Size    Value
## 1                STDS  0.04344
## 2                UTDS  0.60582
## 3              UETSDS  0.24699
## 4               ETSSD  0.00132
## 5         Starks.DTFR  0.05577
## 6               UDTFR  0.55640
## 7              UETSDN  0.25556
## 8 theta.of.max.test.D -1.46312
## 9           Test.Dmax -0.47072
sex_bias_fit$DIF_stats$item_number = seq_along_rows(sex_bias_fit$DIF_stats)

#plot items
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace")

sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))

save_plot_to_file({
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))
}, filename = "figs/good items sex DIF.png")
sex_bias_fit$fits$anchor_liberal %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p < .05) %>% pull(item_number))

#plot bias for test
sex_bias_fit$fits$anchor_conservative %>% plot(type = "score")

#sex difference
SMD_matrix(d2$IQ, d2$sex)
##          Male Female
## Male       NA 0.0734
## Female 0.0734     NA
describe2(d2$IQ, d2$sex)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
##   group  var       n  mean median    sd   mad   min   max  skew kurtosis
##   <fct>  <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 Male   ...1    199  99.4   98.7  16.2  17.1  62.0  149. 0.424   -0.183
## 2 Female ...1    242  98.2   96.9  14.8  16.2  67.0  146. 0.464   -0.434
var.test(IQ ~ sex, data = d2)
## 
##  F test to compare two variances
## 
## data:  IQ by sex
## F = 1, num df = 198, denom df = 241, p-value = 0.2
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.918 1.565
## sample estimates:
## ratio of variances 
##                1.2
#item level pass rate SD by sex
good_items_stats_sexes = itemstats(
all_items_scored %>% select(-id),
group = d2$sex
)
## Warning in cor(data, use = "pairwise.complete.obs"): the standard deviation is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation is zero
bind_rows(
good_items_stats_sexes$Male$itemstats %>% mutate(sex = "Men"),
good_items_stats_sexes$Female$itemstats %>% mutate(sex = "Women")
) %>%
GG_denhist("sd", group = "sex")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`

t.test(
good_items_stats_sexes$Male$itemstats$sd,
good_items_stats_sexes$Female$itemstats$sd
)
## 
##  Welch Two Sample t-test
## 
## data:  good_items_stats_sexes$Male$itemstats$sd and good_items_stats_sexes$Female$itemstats$sd
## t = 1, df = 441, p-value = 0.3
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.00878  0.03110
## sample estimates:
## mean of x mean of y 
##     0.390     0.378
#white subset
d2_white = d2 %>% filter(race_combos_common == "white")

SMD_matrix(d2_white$IQ, d2_white$sex)
##         Male Female
## Male      NA  0.163
## Female 0.163     NA
describe2(d2_white$IQ, d2_white$sex)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
##   group  var       n  mean median    sd   mad   min   max  skew kurtosis
##   <fct>  <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 Male   ...1    148 101.    99.8  15.8  16.2  68.2  142. 0.252   -0.422
## 2 Female ...1    179  98.9   97.6  14.3  15.9  71.1  146. 0.511   -0.310
var.test(IQ ~ sex, data = d2_white)
## 
##  F test to compare two variances
## 
## data:  IQ by sex
## F = 1, num df = 147, denom df = 178, p-value = 0.2
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.892 1.658
## sample estimates:
## ratio of variances 
##               1.21
d2 %>%
filter(race_combos_common == "white") %>%
GG_denhist("IQ", "sex")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Abbreviation
max_items = 50
max_cycles = 2000
plan(multisession(workers = 7))

#filter out items with bad pass rates
good_items_scored_stats = good_items_fit %>% get_mirt_stats()

good_items_passrate_filter = good_items_scored %>% select(all_of(good_items_scored_stats %>% filter(is_between(pass_rate, .05, .95)) %>% pull(item)))
ncol(good_items_passrate_filter)
## [1] 100
#full fit for filtered items
good_items_passrate_filter_fit = cache_object(filename = "data/good_items_passrate_filter_fit.rds", expr = mirt(
  good_items_passrate_filter,
  model = 1,
  itemtype = "2PL",
  technical = list(NCYCLES = max_cycles)
), renew = renew_all)
## Cache found, reading object from disk
#make norms for 100 item version
norms_100 = make_norms(
  score = good_items_passrate_filter_fit %>% fscores(full.scores.SE = T) %>% extract(, 1),
  age = d2$age,
  norm_group = d2$white_only,
  p_value = .05
)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.003**). Model used.
#forward optimize for reliability
vocab_abbrev_forward = cache_object(filename = "data/vocab_abbrev_rc50.rds", expr = {
  abbreviate_scale(
    items = good_items_passrate_filter,
    item_target = max_items,
    method = "forwards",
    selection_method = "rc",
    mirt_args = list(
      model = 1,
      itemtype = "2PL",
      verbose = F,
      technical = list(NCYCLES = max_cycles)
    )
  )
}, renew = renew_all)
## Cache found, reading object from disk
#max loading
#simple
max_loading_basic = abbreviate_scale(
    items = good_items_passrate_filter,
    item_target = max_items,
    method = "max_loading",
    selection_method = "rc",
    mirt_args = list(
      model = 1,
      itemtype = "2PL",
      verbose = F,
      technical = list(NCYCLES = max_cycles)
    )
  )
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## 67.37 sec elapsed
#balancing
max_loading_balanced = abbreviate_scale(
    items = good_items_passrate_filter,
    item_target = max_items,
    method = "max_loading",
    difficulty_balance_groups = 5,
    selection_method = "rc",
    mirt_args = list(
      model = 1,
      itemtype = "2PL",
      verbose = F,
      technical = list(NCYCLES = max_cycles)
    )
  )
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## 30.87 sec elapsed
#residualization
max_loading_resid = abbreviate_scale(
    items = good_items_passrate_filter,
    item_target = max_items,
    method = "max_loading",
    residualize_loadings = T,
    selection_method = "rc",
    mirt_args = list(
      model = 1,
      itemtype = "2PL",
      verbose = F,
      technical = list(NCYCLES = max_cycles)
    )
  )
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## 29.92 sec elapsed
#plot results
abbrev_results = bind_rows(
  vocab_abbrev_forward$best_sets %>% mutate(method = "step forward"),
  max_loading_basic$best_sets %>% mutate(method = "max loading, basic"),
  max_loading_balanced$best_sets %>% mutate(method = "max loading, balanced"),
  max_loading_resid$best_sets %>% mutate(method = "max loading, resid")
) %>% 
  select(reliability, r_full_score, method, items_in_scale, criterion_value) %>% 
  pivot_longer(
    cols = c("reliability", "r_full_score", "criterion_value"),
    names_to = "criterion",
    values_to = "value"
  ) %>% 
  mutate(
    criterion = case_when(
      criterion == "reliability" ~ "Reliability",
      criterion == "r_full_score" ~ "Cor. with full score",
      criterion == "criterion_value" ~ "Combined index"
      )
    )

#plot it
abbrev_results %>%
  ggplot(aes(items_in_scale, value, color = method)) +
  geom_line() +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, .1)) +
  # geom_point() +
  facet_wrap("criterion")

GG_save("figs/abbreviation_comparison.png")
#make norms for abbreviated scales
abbrev_scales_items = seq(10, 50, 5)
abbrev_scales_norms = map(abbrev_scales_items, function(item_count) {
  make_norms(
    score = vocab_abbrev_forward$best_sets %>% filter(items_in_scale == item_count) %>% extract2("scores") %>% extract2(1) %>% extract(, 1),
    age = d2$age,
    norm_group = d2$white_only,
    p_value = .05
  )
}) %>% set_names("scale_" + abbrev_scales_items)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.043). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.016). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.002**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = <0.001***). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.002**). Model used.
#add 100 item version
abbrev_scales_norms$scale_100 = norms_100

abbrev_scales_norms %>% 
  write_rds("data/abbrev_scales_norms.rds", compress = "xz")

#move scores into main dataset
for (scale in abbrev_scales_items) {
  d2[["vocab_IQ_" + scale]] = abbrev_scales_norms[["scale_" + scale]]$data$IQ
}
#verify that age norming was done correctly by checking for age effects and white mean/SD
describe2(d2$vocab_IQ_30, d2$white_only)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
##   group var       n  mean median    sd   mad   min   max  skew kurtosis
##   <fct> <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 FALSE ...1    114  97.0   93.9  16.0  15.3  60.3  141. 0.433   -0.215
## 2 TRUE  ...1    327 100     99.0  15    17.0  66.9  149. 0.296   -0.522
#linear effect of age?
GG_scatter(d2, "age", "vocab_IQ_30")
## `geom_smooth()` using formula = 'y ~ x'

#and no heteroscedasticity
test_HS(d2$vocab_IQ_30, d2$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## # A tibble: 4 × 5
##   test            r2adj     p fit          log10_p
##   <chr>           <dbl> <dbl> <named list>   <dbl>
## 1 linear raw  -0.000649 0.397 <ols>          0.401
## 2 spline raw  -0.00451  0.723 <ols>          0.141
## 3 linear rank -0.00149  0.556 <ols>          0.255
## 4 spline rank -0.00306  0.506 <ols>          0.296

Plot of reliability by ability level by number of items

#compare reliability plots for full and abbreviated scales
scale_reliabilities = map_df(c(-1, -2, abbrev_scales_items), function(item_count) {
  # if 0 items, get the full fit results
  #otherwise get the abbreviated scales
  if (item_count == -1) {
    fit = all_items_fit
    item_count = good_items_scored %>% select(-id) %>% ncol()
  } else if (item_count == -2) {
    fit = good_items_passrate_filter_fit
    item_count = good_items_passrate_filter %>% ncol()
   } else {
    fit = vocab_abbrev_forward$best_sets %>% filter(items_in_scale == item_count) %>% extract2("fit") %>% extract2(1) %>% extract2("fit")
  }
  
  #get reliabilities
  rels = get_reliabilities(fit) %>% 
    mutate(items_in_scale = item_count)
  
  rels
})

#make a joint plot
scale_reliabilities %>% 
  mutate(
    items_in_scale = as.factor(items_in_scale)
  ) %>%
  ggplot(aes(z, rel, color = items_in_scale)) +
  geom_line() +
  scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
  scale_x_continuous("Ability level (z)") +
  coord_cartesian(xlim = c(-4, 4)) +
  scale_color_discrete("Items in scale")

GG_save("figs/reliability comparison.png")

#overall reliability as function of item count
scale_reliabilities_empirical = bind_rows(
  vocab_abbrev_forward$best_sets %>% 
  select(items_in_scale, reliability),
  
  tibble(
    items_in_scale = good_items_passrate_filter %>% ncol(),
    reliability = empirical_rxx(fscores(good_items_passrate_filter_fit, full.scores.SE = T))
  ),
  
  tibble(
    items_in_scale = good_items_scored %>% select(-id) %>% ncol(),
    reliability = empirical_rxx(fscores(good_items_fit, full.scores.SE = T))
  )
)

scale_reliabilities_empirical %>% 
  print(n = Inf)
## # A tibble: 50 × 2
##    items_in_scale reliability
##             <int>       <dbl>
##  1              3       0.658
##  2              4       0.707
##  3              5       0.733
##  4              6       0.757
##  5              7       0.779
##  6              8       0.795
##  7              9       0.811
##  8             10       0.821
##  9             11       0.831
## 10             12       0.840
## 11             13       0.848
## 12             14       0.856
## 13             15       0.862
## 14             16       0.867
## 15             17       0.873
## 16             18       0.877
## 17             19       0.882
## 18             20       0.886
## 19             21       0.890
## 20             22       0.893
## 21             23       0.896
## 22             24       0.900
## 23             25       0.902
## 24             26       0.904
## 25             27       0.906
## 26             28       0.908
## 27             29       0.910
## 28             30       0.913
## 29             31       0.914
## 30             32       0.916
## 31             33       0.918
## 32             34       0.920
## 33             35       0.921
## 34             36       0.923
## 35             37       0.925
## 36             38       0.926
## 37             39       0.927
## 38             40       0.928
## 39             41       0.929
## 40             42       0.930
## 41             43       0.931
## 42             44       0.932
## 43             45       0.933
## 44             46       0.935
## 45             47       0.935
## 46             48       0.936
## 47             49       0.937
## 48             50       0.937
## 49            100       0.954
## 50            219       0.972
#reliability range as function of item count
scale_reliability_ranges = map_df(scale_reliabilities$items_in_scale %>% unique(), \(x) {
  
  scale_reliabilities %>% 
    filter(items_in_scale == x) %>% 
    reliability_range(min = c(.80, .85, .90)) %>% 
    mutate(items_in_scale = x)
}) %>% 
  left_join(
    scale_reliabilities_empirical
  ) %>% 
  arrange(items_in_scale)
## Joining with `by = join_by(items_in_scale)`
scale_reliability_ranges %>% 
  print(n = Inf)
## # A tibble: 33 × 6
##    min_reliability lower_z upper_z coverage items_in_scale reliability
##              <dbl>   <dbl>   <dbl>    <dbl>          <dbl>       <dbl>
##  1            0.8   -1.24   0.814     0.684             10       0.821
##  2            0.85  -1.06   0.513     0.550             10       0.821
##  3            0.9   -0.693  0.0905    0.292             10       0.821
##  4            0.8   -1.42   1.18      0.802             15       0.862
##  5            0.85  -1.24   0.874     0.701             15       0.862
##  6            0.9   -0.935  0.392     0.477             15       0.862
##  7            0.8   -1.60   1.42      0.867             20       0.886
##  8            0.85  -1.42   1.12      0.789             20       0.886
##  9            0.9   -1.12   0.693     0.624             20       0.886
## 10            0.8   -1.78   1.54      0.900             25       0.902
## 11            0.85  -1.54   1.24      0.830             25       0.902
## 12            0.9   -1.24   0.814     0.684             25       0.902
## 13            0.8   -1.90   1.72      0.928             30       0.913
## 14            0.85  -1.66   1.42      0.873             30       0.913
## 15            0.9   -1.36   0.995     0.753             30       0.913
## 16            0.8   -2.02   1.84      0.945             35       0.921
## 17            0.85  -1.78   1.54      0.900             35       0.921
## 18            0.9   -1.48   1.12      0.798             35       0.921
## 19            0.8   -2.14   1.96      0.959             40       0.928
## 20            0.85  -1.90   1.66      0.923             40       0.928
## 21            0.9   -1.54   1.24      0.830             40       0.928
## 22            0.8   -2.26   2.14      0.972             45       0.933
## 23            0.85  -2.02   1.78      0.941             45       0.933
## 24            0.9   -1.66   1.30      0.854             45       0.933
## 25            0.8   -2.38   2.20      0.978             50       0.937
## 26            0.85  -2.14   1.84      0.951             50       0.937
## 27            0.9   -1.78   1.36      0.875             50       0.937
## 28            0.8   -3.11   2.98      0.998            100       0.954
## 29            0.85  -2.74   2.56      0.992            100       0.954
## 30            0.9   -2.32   2.02      0.968            100       0.954
## 31            0.8   -4.55   3.41      1.00             219       0.972
## 32            0.85  -4.25   2.92      0.998            219       0.972
## 33            0.9   -3.89   2.32      0.990            219       0.972
#plot reliability coverages by item count and minimum reliability
scale_reliability_ranges %>% 
  mutate(
    z = 1
  ) %>% 
  ggplot(aes(z, factor(items_in_scale), coverage, color = factor(min_reliability))) +
  geom_errorbarh(aes(xmin = lower_z, xmax = upper_z), position = "dodge") +
  geom_label(aes(x = 0, label = scales::label_percent(1)(coverage)), position = position_dodge(width = 1), size = 3) +
  scale_color_discrete("Minimum reliability") + 
  scale_y_discrete("Items in scale") +
  scale_x_continuous("Coverage of a normal distribution")

GG_save("figs/reliability coverage.png")

Items in tests

#50 item version
(abbrev_50_names = names(good_items_passrate_filter)[vocab_abbrev_forward$best_sets %>% tail(1) %>% pull(item_set) %>% extract2(1)] %>% gtools::mixedsort())
##  [1] "1of5_1"              "2of5_2"              "2of5_3"              "2of5_4"              "2of5_6"             
##  [6] "2of5_8"              "2of5_9"              "2of5_16"             "2of5_17"             "2of5_18"            
## [11] "2of5_22"             "2of5_25"             "2of5_26"             "2of5_27"             "2of5_31"            
## [16] "3of5_4"              "3of5_6"              "3of5_11"             "3of5_20"             "3of5_25"            
## [21] "3of5_26"             "a_type_of_hat"       "a_type_of_overshoe"  "a_type_of_wavy_form" "auspices"           
## [26] "bow"                 "brief"               "diatribes"           "divergence"          "entanglement"       
## [31] "environment"         "excite"              "greed"               "impromptu"           "lackadaisical"      
## [36] "meal"                "pamper"              "referendum"          "relevant"            "sag"                
## [41] "schemer"             "sensitivity"         "shadows"             "sketch"              "sluggish"           
## [46] "somber"              "sporadic"            "stylish"             "tyrant"              "vulgar"
#100 item version (all good items with proper pass rates)
(abbrev_100_names = names(good_items_passrate_filter) %>% gtools::mixedsort())
##   [1] "1of5_1"                     "1of5_2"                     "1of5_3"                     "1of5_4"                    
##   [5] "2of5_1"                     "2of5_2"                     "2of5_3"                     "2of5_4"                    
##   [9] "2of5_5"                     "2of5_6"                     "2of5_7"                     "2of5_8"                    
##  [13] "2of5_9"                     "2of5_10"                    "2of5_11"                    "2of5_12"                   
##  [17] "2of5_13"                    "2of5_15"                    "2of5_16"                    "2of5_17"                   
##  [21] "2of5_18"                    "2of5_19"                    "2of5_20"                    "2of5_21"                   
##  [25] "2of5_22"                    "2of5_23"                    "2of5_25"                    "2of5_26"                   
##  [29] "2of5_27"                    "2of5_28"                    "2of5_29"                    "2of5_30"                   
##  [33] "2of5_31"                    "2of5_33"                    "2of5_34"                    "2of5_35"                   
##  [37] "3of5_1"                     "3of5_4"                     "3of5_5"                     "3of5_6"                    
##  [41] "3of5_7"                     "3of5_11"                    "3of5_12"                    "3of5_14"                   
##  [45] "3of5_16"                    "3of5_17"                    "3of5_18"                    "3of5_20"                   
##  [49] "3of5_21"                    "3of5_22"                    "3of5_24"                    "3of5_25"                   
##  [53] "3of5_26"                    "3of5_28"                    "3of5_30"                    "3of5_31"                   
##  [57] "3of5_32"                    "a_type_of_hat"              "a_type_of_overshoe"         "a_type_of_smokeless_powder"
##  [61] "a_type_of_sound"            "a_type_of_sword"            "a_type_of_wavy_form"        "auspices"                  
##  [65] "bow"                        "brief"                      "couch"                      "deceit"                    
##  [69] "diatribes"                  "disjoined"                  "divergence"                 "dome"                      
##  [73] "entanglement"               "environment"                "excite"                     "flatteries"                
##  [77] "greed"                      "illness"                    "impromptu"                  "lackadaisical"             
##  [81] "meal"                       "pamper"                     "referendum"                 "relating_to_the_right"     
##  [85] "relevant"                   "sag"                        "schemer"                    "sensitivity"               
##  [89] "shadows"                    "sketch"                     "slave"                      "sluggish"                  
##  [93] "somber"                     "sporadic"                   "stylish"                    "transportation"            
##  [97] "tyrant"                     "unhealthful"                "vulgar"                     "wandering"

Meta

#write main data to file for reuse
d2 %>% write_rds("data/main data.rds", compress = "xz")

#save scored items
good_items_scored %>% write_rds("data/item data.rds", compress = "xz")

#save norms
vocab_norms %>% write_rds("data/vocab norms.rds", compress = "xz")
abbrev_scales_norms %>% write_rds("data/vocab abbrev norms.rds", compress = "xz")


#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/6gcy4/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figs", "vocab.Rmd", "vocab.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}