This code below reuses Emil Kirkegaard’s initial code but with the data based on a good sample (i.e., without the respondents who failed attention checks and without the items that served as attention checks). It uses the “item data” rds and “main data” rds from my own analysis, available here: https://osf.io/t2j4s/

Check this file to find the “item data” rds and “main data” rds files used in Kirkegaard main analysis here: https://osf.io/6gcy4/

Init

Sys.setenv(LANG = "en") # make R environment in english

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc

## Warning: package 'Hmisc' was built under R version 4.4.3

## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## Loading required package: assertthat
## 
## Attaching package: 'assertthat'
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## Loading required package: psych
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Attaching package: 'kirkegaard'
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  mirt,
  rms
)

## Loading required package: stats4
## Loading required package: lattice

theme_set(theme_bw())

options(
  digits = 3
)

Functions

inf_to_NA = function(x) {
  x[is.infinite(x)] = NA
  x
}

#combinations encoding
code_combinations = function(x) {
  #rowwise
  x %>% 
    #ensure they are logicals
    map_df(as.logical) %>% 
    #then go rowwise
    plyr::alply(.margins = 1, function(row) {
      # browser()
      #if only one option
      if (sum(row) == 1) return(names(row)[unlist(row)])
      
      #if multiple, combine with commas
      return(str_c(names(row)[unlist(row)], collapse = ", "))
    }) %>% 
    unlist() %>% unname()
}

#get mirt stats
get_reliability = function(x) {
  rxx_info = plot(x, type = "rxx")
  
  #which range has >.90?
  tibble(
    z = rxx_info$panel.args[[1]]$x,
    rel = rxx_info$panel.args[[1]]$y
  )
}

#g loadings
get_loadings = function(x) {
  x@Fit$`F` %>% as.vector()
}

Data

#read data from last study
itemdata = read_rds("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\item data.rds")
d = read_rds("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\main data.rds")

Encoding

d$bw = case_when(
  d$white_only ~ "White",
  d$black_only ~ "Black",
  .default = NA
)

#races
d$race_combos %>% table2()

## # A tibble: 23 × 3
##    Group                  Count Percent
##    <chr>                  <dbl>   <dbl>
##  1 white                    327  74.1  
##  2 black                     47  10.7  
##  3 east_asian                17   3.85 
##  4 hispanic                  10   2.27 
##  5 white, hispanic            8   1.81 
##  6 south_asian                5   1.13 
##  7 white, jewish              5   1.13 
##  8 southest_asian             3   0.680
##  9 black, native_american     2   0.454
## 10 middle_eastern             2   0.454
## # ℹ 13 more rows

Analysis

Basic

d$Is_English_one_of_your_native_languages %>% table2()

## # A tibble: 3 × 3
##   Group Count Percent
##   <chr> <dbl>   <dbl>
## 1 Yes     436   98.9 
## 2 No        5    1.13
## 3 <NA>      0    0

Overall fit

#fit model
good_items_fit = mirt(
  itemdata %>% select(-id),
  model = 1,
  itemtype = "2PL",
  verbose = F,
  technical = list(NCYCLES = 2000)
)

good_items_fit

## 
## Call:
## mirt(data = itemdata %>% select(-id), model = 1, itemtype = "2PL", 
##     verbose = F, technical = list(NCYCLES = 2000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 328 EM iterations.
## mirt version: 1.42 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -36583
## Estimated parameters: 438 
## AIC = 74042
## BIC = 75833; SABIC = 74443

good_items_fit %>% summary()

##                                       F1     h2
## a_specific_number                  0.456 0.2083
## a_type_of_brass_instrument         0.877 0.7699
## a_type_of_drapery                  0.652 0.4257
## a_type_of_fabric_101               0.744 0.5532
## a_type_of_fabric_98                0.700 0.4895
## a_type_of_grasshopper              0.803 0.6452
## a_type_of_hat                      0.574 0.3293
## a_type_of_heating_unit             0.498 0.2478
## a_type_of_magistrate_position      0.746 0.5566
## a_type_of_mathematical_operation   0.569 0.3240
## a_type_of_overshoe                 0.495 0.2455
## a_type_of_plant                    0.734 0.5392
## a_type_of_religious_teacher        0.779 0.6073
## a_type_of_rock                     0.321 0.1028
## a_type_of_skirts                   0.642 0.4126
## a_type_of_smokeless_powder         0.467 0.2178
## a_type_of_sound                    0.490 0.2403
## a_type_of_sword                    0.356 0.1265
## a_type_of_volcanic_crater          0.666 0.4441
## a_type_of_wavy_form                0.706 0.4986
## actionable_negligence              0.769 0.5916
## advisory                           0.723 0.5231
## advocate                           0.702 0.4921
## almighty                           0.914 0.8357
## amazement                          0.661 0.4366
## amenability                        0.740 0.5471
## auspices                           0.688 0.4727
## barely_able_to_read_and_write      0.841 0.7067
## because                            0.680 0.4628
## berate                             0.836 0.6993
## blade                              0.783 0.6130
## blend                              0.683 0.4659
## blunder                            0.860 0.7401
## bow                                0.751 0.5647
## brief                              0.432 0.1866
## bring_about                        0.790 0.6240
## carelessly_or_hastily_put_together 0.609 0.3711
## celebration                        0.719 0.5167
## cheat                              0.806 0.6497
## clay_pigeon_shooting               0.767 0.5885
## coarse                             0.776 0.6028
## collect_or_study_insects           0.626 0.3922
## colossal                           0.713 0.5078
## commotion                          0.903 0.8150
## complainer                         0.795 0.6323
## confiscate                         0.909 0.8268
## congratulate                       0.591 0.3491
## contemplate                        0.468 0.2186
## convoy                             0.855 0.7303
## couch                              0.651 0.4239
## crease                             0.762 0.5811
## cunning                            0.639 0.4085
## deceit                             0.575 0.3301
## deliberately                       0.761 0.5790
## deprive                            0.636 0.4043
## detectable                         0.338 0.1142
## diatribes                          0.579 0.3350
## disjoined                          0.311 0.0970
## disrupt                            0.791 0.6253
## distinct                           0.275 0.0756
## divergence                         0.796 0.6334
## dome                               0.552 0.3046
## downfall                           0.775 0.6010
## drink                              0.851 0.7244
## elite                              0.700 0.4896
## embarrassment                      0.602 0.3624
## emphasize                          0.881 0.7753
## empire                             0.505 0.2554
## empty                              0.759 0.5763
## entanglement                       0.691 0.4775
## environment                        0.659 0.4343
## excite                             0.775 0.6010
## flammable                          0.920 0.8466
## flatterer                          0.868 0.7527
## flatteries                         0.646 0.4178
## forever                            0.799 0.6379
## gigantic                           0.763 0.5826
## girl                               0.779 0.6066
## goo                                0.717 0.5147
## goodbye                            0.820 0.6732
## greed                              0.841 0.7073
## groan                              0.851 0.7244
## gruesome                           0.666 0.4434
## guarantee                          0.283 0.0801
## gutter                             0.658 0.4326
## harmfulness                        0.523 0.2739
## hobby                              0.806 0.6502
## hut                                0.811 0.6582
## illness                            0.491 0.2413
## impromptu                          0.660 0.4359
## indescribable                      0.762 0.5802
## intellectual                       0.711 0.5054
## jargon                             0.926 0.8573
## knowledgeable                      0.817 0.6681
## lackadaisical                      0.606 0.3676
## manager                            0.868 0.7529
## meal                               0.792 0.6277
## melodic                            0.729 0.5313
## mutually                           0.873 0.7630
## nonsense                           0.861 0.7410
## nonsensical                        0.872 0.7600
## not_coveted                        0.664 0.4407
## pamper                             0.590 0.3487
## penitentiary                       0.528 0.2784
## perplexing                         0.691 0.4768
## persistence                        0.638 0.4075
## predetermine                       0.542 0.2940
## pretender                          0.857 0.7348
## questioning                        0.662 0.4380
## quickly                            0.657 0.4321
## rebellious                         0.803 0.6452
## referee                            0.772 0.5961
## referendum                         0.525 0.2760
## relating_to_the_right              0.350 0.1227
## relevant                           0.699 0.4890
## respectful                         0.771 0.5937
## retailer                           0.601 0.3612
## retract                            0.524 0.2751
## ropes                              0.799 0.6381
## sag                                0.596 0.3548
## schemer                            0.669 0.4478
## seize                              0.793 0.6295
## sensitivity                        0.717 0.5145
## shadows                            0.658 0.4333
## sketch                             0.655 0.4284
## slang                              0.803 0.6442
## slave                              0.481 0.2313
## sluggish                           0.634 0.4020
## somber                             0.606 0.3669
## spinelessness                      0.632 0.4000
## sporadic                           0.832 0.6916
## squad                              0.975 0.9509
## stagger                            0.652 0.4256
## stinking                           0.877 0.7686
## stroll                             0.872 0.7606
## stubborn_100                       0.552 0.3050
## stubborn_143                       0.654 0.4282
## stylish                            0.693 0.4796
## summit                             0.756 0.5713
## terminology                        0.830 0.6882
## the_science_of_speech_sounds       0.751 0.5646
## transportation                     0.298 0.0890
## tyrant                             0.439 0.1932
## unhealthful                        0.419 0.1756
## vile                               0.881 0.7770
## vulgar                             0.720 0.5181
## wandering                          0.415 0.1724
## warning                            0.637 0.4063
## wave                               0.721 0.5199
## weaponry                           0.676 0.4571
## 1of5_1                             0.440 0.1938
## 1of5_2                             0.289 0.0838
## 1of5_3                             0.378 0.1431
## 1of5_4                             0.311 0.0969
## 2of5_1                             0.411 0.1685
## 2of5_2                             0.536 0.2872
## 2of5_3                             0.581 0.3376
## 2of5_4                             0.619 0.3830
## 2of5_5                             0.521 0.2714
## 2of5_6                             0.736 0.5414
## 2of5_7                             0.375 0.1406
## 2of5_8                             0.585 0.3427
## 2of5_9                             0.554 0.3074
## 2of5_10                            0.378 0.1428
## 2of5_11                            0.371 0.1373
## 2of5_12                            0.418 0.1749
## 2of5_13                            0.491 0.2410
## 2of5_14                            0.402 0.1612
## 2of5_15                            0.677 0.4578
## 2of5_16                            0.574 0.3299
## 2of5_17                            0.679 0.4617
## 2of5_18                            0.735 0.5400
## 2of5_19                            0.447 0.1997
## 2of5_20                            0.384 0.1477
## 2of5_21                            0.514 0.2646
## 2of5_22                            0.881 0.7754
## 2of5_23                            0.310 0.0964
## 2of5_25                            0.734 0.5394
## 2of5_26                            0.711 0.5058
## 2of5_27                            0.582 0.3385
## 2of5_28                            0.275 0.0755
## 2of5_29                            0.548 0.3007
## 2of5_30                            0.552 0.3047
## 2of5_31                            0.538 0.2900
## 2of5_32                            0.627 0.3926
## 2of5_33                            0.333 0.1111
## 2of5_34                            0.453 0.2055
## 2of5_35                            0.295 0.0871
## 2of5_36                            0.764 0.5831
## 2of5_37                            0.856 0.7335
## 3of5_1                             0.271 0.0732
## 3of5_2                             0.494 0.2441
## 3of5_3                             0.476 0.2263
## 3of5_4                             0.707 0.4992
## 3of5_5                             0.329 0.1082
## 3of5_6                             0.750 0.5630
## 3of5_7                             0.594 0.3526
## 3of5_8                             0.411 0.1690
## 3of5_10                            0.850 0.7228
## 3of5_11                            0.723 0.5220
## 3of5_12                            0.546 0.2980
## 3of5_13                            0.365 0.1330
## 3of5_14                            0.508 0.2577
## 3of5_16                            0.326 0.1065
## 3of5_17                            0.386 0.1486
## 3of5_18                            0.319 0.1018
## 3of5_20                            0.553 0.3056
## 3of5_21                            0.422 0.1781
## 3of5_22                            0.443 0.1960
## 3of5_23                            0.860 0.7398
## 3of5_24                            0.549 0.3018
## 3of5_25                            0.526 0.2766
## 3of5_26                            0.709 0.5033
## 3of5_27                            0.596 0.3551
## 3of5_28                            0.413 0.1709
## 3of5_29                            0.541 0.2926
## 3of5_30                            0.510 0.2606
## 3of5_31                            0.426 0.1812
## 3of5_32                            0.576 0.3316
## 
## SS loadings:  94.3 
## Proportion Var:  0.431 
## 
## Factor correlations: 
## 
##    F1
## F1  1

good_items_stats = itemstats(itemdata %>% select(-id))
good_items_stats$itemstats$loading = good_items_fit %>% get_loadings()
good_items_stats$itemstats$discrim = coef(good_items_fit, simplify = T)$items[, 1] %>% unname()
good_items_stats$itemstats$difficulty = -coef(good_items_fit, simplify = T)$items[, 2] %>% unname()
good_items_stats$itemstats

##                                      N  mean    sd total.r total.r_if_rm
## a_specific_number                  441 0.841 0.366   0.257         0.247
## a_type_of_brass_instrument         441 0.932 0.252   0.387         0.381
## a_type_of_drapery                  441 0.823 0.382   0.389         0.380
## a_type_of_fabric_101               441 0.934 0.248   0.320         0.314
## a_type_of_fabric_98                441 0.755 0.431   0.479         0.470
## a_type_of_grasshopper              441 0.821 0.384   0.510         0.502
## a_type_of_hat                      441 0.356 0.479   0.468         0.456
## a_type_of_heating_unit             441 0.871 0.336   0.248         0.239
## a_type_of_magistrate_position      441 0.898 0.303   0.357         0.350
## a_type_of_mathematical_operation   441 0.921 0.271   0.263         0.255
## a_type_of_overshoe                 441 0.635 0.482   0.396         0.384
## a_type_of_plant                    441 0.862 0.346   0.422         0.414
## a_type_of_religious_teacher        441 0.855 0.353   0.462         0.454
## a_type_of_rock                     441 0.948 0.223   0.100         0.094
## a_type_of_skirts                   441 0.828 0.378   0.386         0.377
## a_type_of_smokeless_powder         441 0.753 0.432   0.301         0.290
## a_type_of_sound                    441 0.680 0.467   0.364         0.353
## a_type_of_sword                    441 0.599 0.491   0.258         0.245
## a_type_of_volcanic_crater          441 0.855 0.353   0.380         0.371
## a_type_of_wavy_form                441 0.615 0.487   0.549         0.539
## actionable_negligence              441 0.841 0.366   0.449         0.440
## advisory                           441 0.959 0.198   0.242         0.237
## advocate                           441 0.751 0.433   0.502         0.492
## almighty                           441 0.961 0.193   0.354         0.349
## amazement                          441 0.918 0.274   0.305         0.298
## amenability                        441 0.832 0.374   0.422         0.413
## auspices                           441 0.494 0.501   0.558         0.548
## barely_able_to_read_and_write      441 0.880 0.326   0.473         0.466
## because                            441 0.912 0.284   0.284         0.277
## berate                             441 0.857 0.350   0.451         0.443
## blade                              441 0.939 0.240   0.342         0.336
## blend                              441 0.912 0.284   0.329         0.321
## blunder                            441 0.961 0.193   0.230         0.225
## bow                                441 0.596 0.491   0.595         0.586
## brief                              441 0.571 0.495   0.429         0.417
## bring_about                        441 0.868 0.338   0.438         0.430
## carelessly_or_hastily_put_together 441 0.866 0.341   0.312         0.304
## celebration                        441 0.757 0.429   0.477         0.467
## cheat                              441 0.723 0.448   0.577         0.569
## clay_pigeon_shooting               441 0.943 0.232   0.292         0.286
## coarse                             441 0.941 0.236   0.340         0.333
## collect_or_study_insects           441 0.932 0.252   0.272         0.265
## colossal                           441 0.823 0.382   0.419         0.410
## commotion                          441 0.853 0.355   0.563         0.556
## complainer                         441 0.839 0.368   0.483         0.475
## confiscate                         441 0.896 0.306   0.483         0.476
## congratulate                       441 0.887 0.317   0.294         0.285
## contemplate                        441 0.902 0.297   0.242         0.234
## convoy                             441 0.764 0.425   0.594         0.586
## couch                              441 0.757 0.429   0.434         0.424
## crease                             441 0.830 0.376   0.470         0.461
## cunning                            441 0.796 0.403   0.369         0.359
## deceit                             441 0.746 0.436   0.391         0.380
## deliberately                       441 0.812 0.391   0.479         0.471
## deprive                            441 0.837 0.370   0.386         0.377
## detectable                         441 0.966 0.181   0.103         0.097
## diatribes                          441 0.624 0.485   0.453         0.441
## disjoined                          441 0.494 0.501   0.272         0.259
## disrupt                            441 0.959 0.198   0.274         0.269
## distinct                           441 0.857 0.350   0.186         0.176
## divergence                         441 0.698 0.459   0.591         0.582
## dome                               441 0.744 0.437   0.393         0.383
## downfall                           441 0.898 0.303   0.407         0.400
## drink                              441 0.769 0.422   0.558         0.550
## elite                              441 0.853 0.355   0.384         0.376
## embarrassment                      441 0.875 0.331   0.313         0.304
## emphasize                          441 0.966 0.181   0.286         0.282
## empire                             441 0.816 0.388   0.317         0.307
## empty                              441 0.848 0.359   0.448         0.439
## entanglement                       441 0.542 0.499   0.539         0.528
## environment                        441 0.503 0.501   0.541         0.531
## excite                             441 0.707 0.455   0.581         0.572
## flammable                          441 0.986 0.116   0.223         0.220
## flatterer                          441 0.844 0.364   0.558         0.551
## flatteries                         441 0.732 0.443   0.482         0.472
## forever                            441 0.864 0.343   0.453         0.445
## gigantic                           441 0.952 0.213   0.242         0.237
## girl                               441 0.875 0.331   0.418         0.410
## goo                                441 0.971 0.169   0.219         0.215
## goodbye                            441 0.948 0.223   0.326         0.320
## greed                              441 0.649 0.478   0.668         0.660
## groan                              441 0.912 0.284   0.391         0.384
## gruesome                           441 0.834 0.372   0.392         0.383
## guarantee                          441 0.930 0.256   0.108         0.100
## gutter                             441 0.941 0.236   0.223         0.217
## harmfulness                        441 0.868 0.338   0.312         0.303
## hobby                              441 0.880 0.326   0.443         0.436
## hut                                441 0.889 0.315   0.454         0.446
## illness                            441 0.717 0.451   0.334         0.323
## impromptu                          441 0.626 0.484   0.513         0.503
## indescribable                      441 0.955 0.208   0.291         0.286
## intellectual                       441 0.943 0.232   0.296         0.290
## jargon                             441 0.959 0.198   0.346         0.341
## knowledgeable                      441 0.823 0.382   0.513         0.505
## lackadaisical                      441 0.741 0.438   0.416         0.405
## manager                            441 0.986 0.116   0.192         0.189
## meal                               441 0.610 0.488   0.627         0.619
## melodic                            441 0.873 0.333   0.422         0.414
## mutually                           441 0.966 0.181   0.310         0.305
## nonsense                           441 0.873 0.333   0.493         0.486
## nonsensical                        441 0.973 0.163   0.306         0.301
## not_coveted                        441 0.853 0.355   0.371         0.362
## pamper                             441 0.467 0.499   0.452         0.441
## penitentiary                       441 0.918 0.274   0.241         0.234
## perplexing                         441 0.905 0.294   0.376         0.369
## persistence                        441 0.862 0.346   0.350         0.341
## predetermine                       441 0.916 0.278   0.256         0.249
## pretender                          441 0.991 0.095   0.139         0.136
## questioning                        441 0.900 0.300   0.343         0.335
## quickly                            441 0.789 0.408   0.400         0.391
## rebellious                         441 0.832 0.374   0.488         0.480
## referee                            441 0.898 0.303   0.398         0.390
## referendum                         441 0.397 0.490   0.440         0.428
## relating_to_the_right              441 0.698 0.459   0.290         0.277
## relevant                           441 0.705 0.456   0.482         0.472
## respectful                         441 0.859 0.348   0.422         0.414
## retailer                           441 0.798 0.402   0.393         0.383
## retract                            441 0.875 0.331   0.281         0.272
## ropes                              441 0.857 0.350   0.417         0.409
## sag                                441 0.719 0.450   0.460         0.449
## schemer                            441 0.585 0.493   0.525         0.515
## seize                              441 0.864 0.343   0.442         0.434
## sensitivity                        441 0.728 0.446   0.511         0.502
## shadows                            441 0.685 0.465   0.492         0.481
## sketch                             441 0.721 0.449   0.474         0.464
## slang                              441 0.821 0.384   0.481         0.473
## slave                              441 0.776 0.418   0.361         0.350
## sluggish                           441 0.746 0.436   0.479         0.469
## somber                             441 0.662 0.474   0.488         0.478
## spinelessness                      441 0.830 0.376   0.397         0.388
## sporadic                           441 0.658 0.475   0.668         0.660
## squad                              441 0.993 0.082   0.207         0.205
## stagger                            441 0.769 0.422   0.441         0.431
## stinking                           441 0.862 0.346   0.500         0.492
## stroll                             441 0.698 0.459   0.670         0.663
## stubborn_100                       441 0.864 0.343   0.282         0.273
## stubborn_143                       441 0.871 0.336   0.404         0.396
## stylish                            441 0.469 0.500   0.560         0.549
## summit                             441 0.859 0.348   0.437         0.429
## terminology                        441 0.739 0.440   0.588         0.580
## the_science_of_speech_sounds       441 0.964 0.187   0.218         0.213
## transportation                     441 0.660 0.474   0.217         0.204
## tyrant                             441 0.333 0.472   0.332         0.319
## unhealthful                        441 0.435 0.496   0.329         0.316
## vile                               441 0.948 0.223   0.358         0.352
## vulgar                             441 0.610 0.488   0.533         0.522
## wandering                          441 0.565 0.496   0.328         0.315
## warning                            441 0.893 0.309   0.359         0.351
## wave                               441 0.821 0.384   0.453         0.444
## weaponry                           441 0.834 0.372   0.405         0.396
## 1of5_1                             383 0.305 0.461   0.331         0.319
## 1of5_2                             383 0.303 0.460   0.222         0.209
## 1of5_3                             383 0.245 0.431   0.237         0.225
## 1of5_4                             383 0.431 0.496   0.255         0.242
## 2of5_1                             383 0.475 0.500   0.345         0.332
## 2of5_2                             383 0.269 0.444   0.416         0.406
## 2of5_3                             383 0.465 0.499   0.487         0.476
## 2of5_4                             383 0.266 0.443   0.432         0.422
## 2of5_5                             383 0.554 0.498   0.426         0.414
## 2of5_6                             383 0.713 0.453   0.524         0.515
## 2of5_7                             383 0.313 0.464   0.248         0.235
## 2of5_8                             383 0.245 0.431   0.406         0.395
## 2of5_9                             383 0.436 0.497   0.435         0.423
## 2of5_10                            383 0.423 0.495   0.324         0.311
## 2of5_11                            383 0.527 0.500   0.306         0.292
## 2of5_12                            383 0.232 0.423   0.315         0.304
## 2of5_13                            383 0.326 0.469   0.389         0.377
## 2of5_14                            383 0.136 0.343   0.244         0.235
## 2of5_15                            383 0.721 0.449   0.493         0.483
## 2of5_16                            383 0.493 0.501   0.469         0.457
## 2of5_17                            383 0.569 0.496   0.572         0.562
## 2of5_18                            383 0.402 0.491   0.582         0.573
## 2of5_19                            383 0.522 0.500   0.370         0.357
## 2of5_20                            383 0.559 0.497   0.305         0.291
## 2of5_21                            383 0.266 0.443   0.425         0.414
## 2of5_22                            383 0.567 0.496   0.725         0.718
## 2of5_23                            383 0.282 0.451   0.257         0.245
## 2of5_25                            383 0.543 0.499   0.599         0.589
## 2of5_26                            383 0.467 0.500   0.573         0.563
## 2of5_27                            383 0.256 0.437   0.443         0.432
## 2of5_28                            383 0.554 0.498   0.256         0.242
## 2of5_29                            383 0.420 0.494   0.457         0.445
## 2of5_30                            383 0.580 0.494   0.443         0.431
## 2of5_31                            383 0.298 0.458   0.399         0.387
## 2of5_32                            383 0.117 0.322   0.334         0.326
## 2of5_33                            383 0.272 0.445   0.250         0.238
## 2of5_34                            383 0.509 0.501   0.380         0.368
## 2of5_35                            383 0.433 0.496   0.241         0.227
## 2of5_36                            383 0.990 0.102   0.151         0.148
## 2of5_37                            383 0.997 0.051   0.103         0.101
## 3of5_1                             383 0.407 0.492   0.234         0.220
## 3of5_2                             383 0.157 0.364   0.291         0.281
## 3of5_3                             383 0.185 0.389   0.323         0.313
## 3of5_4                             383 0.480 0.500   0.576         0.566
## 3of5_5                             383 0.394 0.489   0.310         0.297
## 3of5_6                             383 0.475 0.500   0.599         0.590
## 3of5_7                             383 0.230 0.421   0.444         0.434
## 3of5_8                             383 0.099 0.299   0.204         0.196
## 3of5_10                            383 0.794 0.405   0.578         0.570
## 3of5_11                            383 0.311 0.463   0.538         0.529
## 3of5_12                            383 0.499 0.501   0.460         0.449
## 3of5_13                            383 0.180 0.385   0.226         0.215
## 3of5_14                            383 0.587 0.493   0.410         0.398
## 3of5_16                            383 0.467 0.500   0.278         0.264
## 3of5_17                            383 0.368 0.483   0.325         0.312
## 3of5_18                            383 0.428 0.495   0.273         0.259
## 3of5_20                            383 0.339 0.474   0.440         0.429
## 3of5_21                            383 0.601 0.490   0.344         0.331
## 3of5_22                            383 0.540 0.499   0.405         0.393
## 3of5_23                            383 0.898 0.303   0.462         0.455
## 3of5_24                            383 0.637 0.481   0.457         0.446
## 3of5_25                            383 0.379 0.486   0.416         0.404
## 3of5_26                            383 0.373 0.484   0.564         0.554
## 3of5_27                            383 0.178 0.383   0.379         0.369
## 3of5_28                            383 0.815 0.389   0.275         0.264
## 3of5_29                            383 0.825 0.380   0.347         0.337
## 3of5_30                            383 0.433 0.496   0.423         0.411
## 3of5_31                            383 0.702 0.458   0.329         0.317
## 3of5_32                            383 0.705 0.457   0.436         0.425
##                                    alpha_if_rm loading discrim difficulty
## a_specific_number                        0.975   0.456   0.873     -1.901
## a_type_of_brass_instrument               0.975   0.877   3.113     -4.945
## a_type_of_drapery                        0.975   0.652   1.465     -2.098
## a_type_of_fabric_101                     0.975   0.744   1.894     -3.742
## a_type_of_fabric_98                      0.975   0.700   1.667     -1.707
## a_type_of_grasshopper                    0.975   0.803   2.295     -2.683
## a_type_of_hat                            0.975   0.574   1.192      0.723
## a_type_of_heating_unit                   0.975   0.498   0.977     -2.215
## a_type_of_magistrate_position            0.975   0.746   1.907     -3.199
## a_type_of_mathematical_operation         0.975   0.569   1.178     -2.927
## a_type_of_overshoe                       0.975   0.495   0.971     -0.690
## a_type_of_plant                          0.975   0.734   1.841     -2.726
## a_type_of_religious_teacher              0.975   0.779   2.117     -2.878
## a_type_of_rock                           0.975   0.321   0.576     -3.036
## a_type_of_skirts                         0.975   0.642   1.426     -2.111
## a_type_of_smokeless_powder               0.975   0.467   0.898     -1.306
## a_type_of_sound                          0.975   0.490   0.957     -0.921
## a_type_of_sword                          0.975   0.356   0.648     -0.449
## a_type_of_volcanic_crater                0.975   0.666   1.521     -2.417
## a_type_of_wavy_form                      0.975   0.706   1.697     -0.806
## actionable_negligence                    0.975   0.769   2.048     -2.685
## advisory                                 0.975   0.723   1.783     -4.193
## advocate                                 0.975   0.702   1.675     -1.680
## almighty                                 0.975   0.914   3.839     -6.568
## amazement                                0.975   0.661   1.498     -3.137
## amenability                              0.975   0.740   1.871     -2.462
## auspices                                 0.975   0.688   1.611     -0.062
## barely_able_to_read_and_write            0.975   0.841   2.642     -3.630
## because                                  0.975   0.680   1.580     -3.105
## berate                                   0.975   0.836   2.596     -3.314
## blade                                    0.975   0.783   2.142     -4.068
## blend                                    0.975   0.683   1.590     -3.114
## blunder                                  0.975   0.860   2.872     -5.416
## bow                                      0.975   0.751   1.938     -0.774
## brief                                    0.975   0.432   0.815     -0.350
## bring_about                              0.975   0.790   2.192     -3.090
## carelessly_or_hastily_put_together       0.975   0.609   1.308     -2.377
## celebration                              0.975   0.719   1.760     -1.779
## cheat                                    0.975   0.806   2.318     -1.864
## clay_pigeon_shooting                     0.975   0.767   2.035     -4.057
## coarse                                   0.975   0.776   2.097     -4.069
## collect_or_study_insects                 0.975   0.626   1.367     -3.247
## colossal                                 0.975   0.713   1.729     -2.278
## commotion                                0.975   0.903   3.572     -4.156
## complainer                               0.975   0.795   2.232     -2.810
## confiscate                               0.975   0.909   3.718     -4.932
## congratulate                             0.975   0.591   1.246     -2.546
## contemplate                              0.975   0.468   0.900     -2.509
## convoy                                   0.975   0.855   2.801     -2.538
## couch                                    0.975   0.651   1.460     -1.605
## crease                                   0.975   0.762   2.005     -2.542
## cunning                                  0.975   0.639   1.414     -1.854
## deceit                                   0.975   0.575   1.195     -1.395
## deliberately                             0.975   0.761   1.996     -2.372
## deprive                                  0.975   0.636   1.402     -2.171
## detectable                               0.975   0.338   0.611     -3.504
## diatribes                                0.975   0.579   1.208     -0.699
## disjoined                                0.975   0.311   0.558      0.016
## disrupt                                  0.975   0.791   2.198     -4.609
## distinct                                 0.975   0.275   0.487     -1.873
## divergence                               0.975   0.796   2.237     -1.624
## dome                                     0.975   0.552   1.126     -1.349
## downfall                                 0.975   0.775   2.089     -3.359
## drink                                    0.975   0.851   2.759     -2.550
## elite                                    0.975   0.700   1.667     -2.501
## embarrassment                            0.975   0.602   1.283     -2.451
## emphasize                                0.975   0.881   3.161     -5.909
## empire                                   0.975   0.505   0.997     -1.771
## empty                                    0.975   0.759   1.985     -2.702
## entanglement                             0.975   0.691   1.627     -0.349
## environment                              0.975   0.659   1.491     -0.101
## excite                                   0.975   0.775   2.089     -1.608
## flammable                                0.975   0.920   3.999     -8.102
## flatterer                                0.975   0.868   2.969     -3.489
## flatteries                               0.975   0.646   1.442     -1.430
## forever                                  0.975   0.799   2.259     -3.096
## gigantic                                 0.975   0.763   2.011     -4.239
## girl                                     0.975   0.779   2.113     -3.100
## goo                                      0.975   0.717   1.753     -4.524
## goodbye                                  0.975   0.820   2.443     -4.572
## greed                                    0.975   0.841   2.646     -1.450
## groan                                    0.975   0.851   2.760     -4.202
## gruesome                                 0.975   0.666   1.519     -2.230
## guarantee                                0.975   0.283   0.502     -2.683
## gutter                                   0.975   0.658   1.486     -3.509
## harmfulness                              0.975   0.523   1.045     -2.233
## hobby                                    0.975   0.806   2.321     -3.336
## hut                                      0.975   0.811   2.362     -3.488
## illness                                  0.975   0.491   0.960     -1.120
## impromptu                                0.975   0.660   1.496     -0.804
## indescribable                            0.975   0.762   2.001     -4.286
## intellectual                             0.975   0.711   1.720     -3.759
## jargon                                   0.975   0.926   4.172     -6.900
## knowledgeable                            0.975   0.817   2.415     -2.800
## lackadaisical                            0.975   0.606   1.298     -1.416
## manager                                  0.975   0.868   2.971     -6.748
## meal                                     0.975   0.792   2.210     -0.965
## melodic                                  0.975   0.729   1.812     -2.823
## mutually                                 0.975   0.873   3.054     -5.781
## nonsense                                 0.975   0.861   2.879     -3.766
## nonsensical                              0.975   0.872   3.029     -6.021
## not_coveted                              0.975   0.664   1.511     -2.388
## pamper                                   0.975   0.590   1.245      0.118
## penitentiary                             0.975   0.528   1.057     -2.811
## perplexing                               0.975   0.691   1.625     -3.050
## persistence                              0.975   0.638   1.412     -2.405
## predetermine                             0.975   0.542   1.098     -2.807
## pretender                                0.975   0.857   2.833     -7.019
## questioning                              0.975   0.662   1.502     -2.894
## quickly                                  0.975   0.657   1.485     -1.845
## rebellious                               0.975   0.803   2.295     -2.793
## referee                                  0.975   0.772   2.068     -3.340
## referendum                               0.975   0.525   1.051      0.482
## relating_to_the_right                    0.975   0.350   0.637     -0.922
## relevant                                 0.975   0.699   1.665     -1.366
## respectful                               0.975   0.771   2.058     -2.877
## retailer                                 0.975   0.601   1.280     -1.792
## retract                                  0.975   0.524   1.048     -2.301
## ropes                                    0.975   0.799   2.260     -3.022
## sag                                      0.975   0.596   1.262     -1.258
## schemer                                  0.975   0.669   1.533     -0.578
## seize                                    0.975   0.793   2.219     -3.061
## sensitivity                              0.975   0.717   1.752     -1.565
## shadows                                  0.975   0.658   1.488     -1.153
## sketch                                   0.975   0.655   1.473     -1.373
## slang                                    0.975   0.803   2.290     -2.679
## slave                                    0.975   0.481   0.934     -1.462
## sluggish                                 0.975   0.634   1.395     -1.495
## somber                                   0.975   0.606   1.296     -0.940
## spinelessness                            0.975   0.632   1.390     -2.106
## sporadic                                 0.975   0.832   2.549     -1.474
## squad                                    0.975   0.975   7.487    -14.285
## stagger                                  0.975   0.652   1.465     -1.687
## stinking                                 0.975   0.877   3.102     -3.831
## stroll                                   0.975   0.872   3.034     -2.092
## stubborn_100                             0.975   0.552   1.128     -2.239
## stubborn_143                             0.975   0.654   1.473     -2.539
## stylish                                  0.975   0.693   1.634      0.086
## summit                                   0.975   0.756   1.965     -2.801
## terminology                              0.975   0.830   2.528     -2.128
## the_science_of_speech_sounds             0.975   0.751   1.938     -4.477
## transportation                           0.975   0.298   0.532     -0.711
## tyrant                                   0.975   0.439   0.833      0.781
## unhealthful                              0.975   0.419   0.785      0.278
## vile                                     0.975   0.881   3.177     -5.378
## vulgar                                   0.975   0.720   1.765     -0.802
## wandering                                0.975   0.415   0.777     -0.313
## warning                                  0.975   0.637   1.408     -2.739
## wave                                     0.975   0.721   1.771     -2.288
## weaponry                                 0.975   0.676   1.562     -2.259
## 1of5_1                                   0.975   0.440   0.834      0.977
## 1of5_2                                   0.975   0.289   0.515      0.908
## 1of5_3                                   0.975   0.378   0.695      1.271
## 1of5_4                                   0.975   0.311   0.557      0.322
## 2of5_1                                   0.975   0.411   0.766      0.139
## 2of5_2                                   0.975   0.536   1.080      1.274
## 2of5_3                                   0.975   0.581   1.215      0.204
## 2of5_4                                   0.975   0.619   1.341      1.403
## 2of5_5                                   0.975   0.521   1.039     -0.239
## 2of5_6                                   0.975   0.736   1.849     -1.406
## 2of5_7                                   0.975   0.375   0.688      0.896
## 2of5_8                                   0.975   0.585   1.229      1.496
## 2of5_9                                   0.975   0.554   1.134      0.350
## 2of5_10                                  0.975   0.378   0.695      0.371
## 2of5_11                                  0.975   0.371   0.679     -0.096
## 2of5_12                                  0.975   0.418   0.784      1.384
## 2of5_13                                  0.975   0.491   0.959      0.898
## 2of5_14                                  0.975   0.402   0.746      2.089
## 2of5_15                                  0.975   0.677   1.564     -1.325
## 2of5_16                                  0.975   0.574   1.194      0.054
## 2of5_17                                  0.975   0.679   1.576     -0.402
## 2of5_18                                  0.975   0.735   1.844      0.638
## 2of5_19                                  0.975   0.447   0.850     -0.077
## 2of5_20                                  0.975   0.384   0.709     -0.237
## 2of5_21                                  0.975   0.514   1.021      1.267
## 2of5_22                                  0.975   0.881   3.162     -0.750
## 2of5_23                                  0.975   0.310   0.556      1.025
## 2of5_25                                  0.975   0.734   1.842     -0.288
## 2of5_26                                  0.975   0.711   1.722      0.200
## 2of5_27                                  0.975   0.582   1.218      1.418
## 2of5_28                                  0.975   0.275   0.486     -0.205
## 2of5_29                                  0.975   0.548   1.116      0.429
## 2of5_30                                  0.975   0.552   1.127     -0.382
## 2of5_31                                  0.975   0.538   1.088      1.101
## 2of5_32                                  0.975   0.627   1.368      2.736
## 2of5_33                                  0.975   0.333   0.602      1.093
## 2of5_34                                  0.975   0.453   0.866     -0.017
## 2of5_35                                  0.975   0.295   0.526      0.308
## 2of5_36                                  0.975   0.764   2.013     -5.815
## 2of5_37                                  0.975   0.856   2.824     -8.239
## 3of5_1                                   0.975   0.271   0.478      0.417
## 3of5_2                                   0.975   0.494   0.967      2.033
## 3of5_3                                   0.975   0.476   0.921      1.772
## 3of5_4                                   0.975   0.707   1.699      0.119
## 3of5_5                                   0.975   0.329   0.593      0.489
## 3of5_6                                   0.975   0.750   1.932      0.149
## 3of5_7                                   0.975   0.594   1.256      1.621
## 3of5_8                                   0.975   0.411   0.767      2.484
## 3of5_10                                  0.975   0.850   2.749     -2.626
## 3of5_11                                  0.975   0.723   1.779      1.270
## 3of5_12                                  0.975   0.546   1.109      0.029
## 3of5_13                                  0.975   0.365   0.667      1.686
## 3of5_14                                  0.975   0.508   1.003     -0.403
## 3of5_16                                  0.975   0.326   0.587      0.165
## 3of5_17                                  0.975   0.386   0.711      0.629
## 3of5_18                                  0.975   0.319   0.573      0.335
## 3of5_20                                  0.975   0.553   1.129      0.871
## 3of5_21                                  0.975   0.422   0.792     -0.437
## 3of5_22                                  0.975   0.443   0.840     -0.161
## 3of5_23                                  0.975   0.860   2.870     -3.963
## 3of5_24                                  0.975   0.549   1.119     -0.679
## 3of5_25                                  0.975   0.526   1.052      0.639
## 3of5_26                                  0.975   0.709   1.713      0.807
## 3of5_27                                  0.975   0.596   1.263      2.041
## 3of5_28                                  0.975   0.413   0.773     -1.612
## 3of5_29                                  0.975   0.541   1.095     -1.829
## 3of5_30                                  0.975   0.510   1.010      0.352
## 3of5_31                                  0.975   0.426   0.801     -0.943
## 3of5_32                                  0.975   0.576   1.199     -1.079

good_items_stats$itemstats %>% 
  describe2()

## # A tibble: 9 × 10
##   var        n    mean  median      sd     mad      min     max    skew kurtosis
##   <chr>  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>    <dbl>   <dbl>   <dbl>    <dbl>
## 1 N        219 423.    441     2.70e+1 0       383      441     -0.791   -1.38  
## 2 mean     219   0.692   0.757 2.32e-1 2.29e-1   0.0992   0.997 -0.714   -0.618 
## 3 sd       219   0.386   0.405 1.04e-1 1.10e-1   0.0511   0.501 -0.892    0.124 
## 4 total…   219   0.391   0.396 1.19e-1 1.24e-1   0.0999   0.725  0.0355  -0.280 
## 5 total…   219   0.382   0.384 1.19e-1 1.23e-1   0.0942   0.718  0.0607  -0.294 
## 6 alpha…   219   0.975   0.975 8.54e-5 8.82e-5   0.975    0.975 -0.498   -0.0909
## 7 loadi…   219   0.634   0.658 1.70e-1 1.80e-1   0.271    0.975 -0.346   -0.798 
## 8 discr…   219   1.62    1.49  8.61e-1 7.63e-1   0.478    7.49   1.95     8.85  
## 9 diffi…   219  -1.75   -1.71  2.22e+0 2.06e+0 -14.3      2.74  -1.10     3.99

#difficulties
good_items_stats$itemstats$mean %>% GG_denhist() +
  scale_x_continuous("Pass rate")

## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`

## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items pass rate.png")

#loadings
good_items_stats$itemstats$loading %>% GG_denhist() +
  scale_x_continuous("Factor loading")

## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items factor loading.png")

#reliability
good_items_fit_scores = fscores(good_items_fit, full.scores.SE = T)
empirical_rxx(good_items_fit_scores)

##    F1 
## 0.972

#which range has >.90?
d_rxx_info = rxx_info = get_reliability(good_items_fit)

d_rxx_info %>% 
  ggplot(aes(z, rel)) +
  geom_line() +
  scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
  scale_x_continuous("Ability level (z)") +
  coord_cartesian(xlim = c(-4, 4))

#merge g score back to main dataset
d$g = NULL
d = left_join(
  d,
  tibble(id = itemdata$id, 
         g = good_items_fit_scores[, 1]),
  by = c("Participant_id" = "id")
)

assert_that(!anyDuplicated(d$Participant_id))

## [1] TRUE

Norms

#he age problem
GG_scatter(d, "age", "g") +
  geom_smooth()

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#scores by age group
d$age_group = discretize(d$age, 3)

GG_group_means(d, "g", "age_group", type = "boxplot") +
  scale_y_continuous("Vocabulary IRT score") +
  scale_x_discrete("Age group")

## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

GG_save("figs/good items boxplots by age.png")

describe2(d$g, d$age_group)

## New names:
## • `` -> `...1`

## # A tibble: 3 × 11
##   group       var       n    mean  median    sd   mad   min   max  skew kurtosis
##   <fct>       <chr> <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 [18.9,40.3] ...1    176 -0.390  -0.481  0.832 0.841 -2.27  1.81 0.592   -0.194
## 2 (40.3,61.7] ...1    174  0.0964 -0.0615 0.957 1.09  -1.76  2.55 0.460   -0.685
## 3 (61.7,83.1] ...1     91  0.562   0.529  1.02  1.03  -1.50  3.15 0.184   -0.191

#whites differ in age?
describe2(d$age, d$white_only)

## New names:
## • `` -> `...1`

## # A tibble: 2 × 11
##   group var       n  mean median    sd   mad   min   max   skew kurtosis
##   <fct> <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl>
## 1 FALSE ...1    114  41.3     39  14.8  18.5    21    74  0.312    -1.09
## 2 TRUE  ...1    327  48.5     51  15.7  17.8    19    83 -0.121    -1.07

#white subset is easier to work with
d_white = d %>% filter(white_only)
(white_ageadj_model = lm(g ~ age, data = d_white))

## 
## Call:
## lm(formula = g ~ age, data = d_white)
## 
## Coefficients:
## (Intercept)          age  
##     -1.1010       0.0251

#get resids, step 1
d_white$g_ageadj1 = resid(white_ageadj_model)
(ageadj_desc_whites = describe2(d_white$g_ageadj1))

## # A tibble: 1 × 10
##   var       n      mean  median    sd   mad   min   max  skew kurtosis
##   <chr> <dbl>     <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 x       327 -2.57e-17 -0.0648 0.901 0.958 -2.26  2.60 0.320   -0.281

#alternative is to just model the absolute resids directly
(absSD_ols_whites = lm(abs(g_ageadj1) ~ age, data = d_white))

## 
## Call:
## lm(formula = abs(g_ageadj1) ~ age, data = d_white)
## 
## Coefficients:
## (Intercept)          age  
##     0.46724      0.00544

#get age mean and SD adjusted scores
d_white$g_ageadj2 = d_white$g_ageadj1 / predict(absSD_ols_whites)

#does this work tho?
d_white %>% 
  GG_scatter("age", "g_ageadj2")

## `geom_smooth()` using formula = 'y ~ x'

test_HS(d_white$g_ageadj2, d_white$age)

## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

## # A tibble: 4 × 5
##   test           r2adj     p fit          log10_p
##   <chr>          <dbl> <dbl> <named list>   <dbl>
## 1 linear raw  -0.00300 0.872 <ols>         0.0594
## 2 spline raw  -0.00246 0.361 <ols>         0.443 
## 3 linear rank -0.00305 0.924 <ols>         0.0343
## 4 spline rank  0.00330 0.164 <ols>         0.785

#restore to white z score norms
(white_desc_ageadj2_desc = describe2(d_white$g_ageadj2))

## # A tibble: 1 × 10
##   var       n     mean  median    sd   mad   min   max  skew kurtosis
##   <chr> <dbl>    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 x       327 0.000443 -0.0814  1.22  1.33 -2.59  3.73 0.397   -0.350

d_white$g_ageadj3 = (d_white$g_ageadj2 - white_desc_ageadj2_desc$mean) / white_desc_ageadj2_desc$sd
d_white$g_ageadj3 %>% describe2()

## # A tibble: 1 × 10
##   var       n     mean  median    sd   mad   min   max  skew kurtosis
##   <chr> <dbl>    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 x       327 1.50e-17 -0.0668     1  1.08 -2.12  3.05 0.397   -0.350

#Thus finally, we can make IQs using a function
vocab_to_IQ = function(x, .age) {
  #remove age effect first
  resid_age = x - predict(white_ageadj_model, newdata = tibble(age = .age, g = x))
  
  #adjust scores as function of their age
  SD_factor = predict(absSD_ols_whites, newdata = tibble(
    age = .age
  ))
  
  #adjust SD
  resid_age_whitesd_agesd = resid_age / SD_factor
  
  #set to white norms
  step3 = (resid_age_whitesd_agesd - white_desc_ageadj2_desc$mean) / white_desc_ageadj2_desc$sd
  
  #output IQ scores
  step3 * 15 + 100
}

d$IQ = vocab_to_IQ(d$g, d$age)

#and no heteroscedasticity
test_HS(d$IQ, d$age)

## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

## # A tibble: 4 × 5
##   test           r2adj     p fit          log10_p
##   <chr>          <dbl> <dbl> <named list>   <dbl>
## 1 linear raw  -0.00226 0.937 <ols>         0.0282
## 2 spline raw  -0.00808 0.925 <ols>         0.0339
## 3 linear rank -0.00211 0.786 <ols>         0.105 
## 4 spline rank -0.00674 0.803 <ols>         0.0954

Basic stats

#race gaps
SMD_matrix(d$IQ, d$race_combos_common)

##                   black east_asian hispanic   white white, hispanic   Other
## black                NA    -0.6096    0.306 -0.5590          0.0627 -0.6376
## east_asian      -0.6096         NA    0.915  0.0507          0.6723 -0.0279
## hispanic         0.3056     0.9152       NA -0.8646         -0.2429 -0.9432
## white           -0.5590     0.0507   -0.865      NA          0.6217 -0.0786
## white, hispanic  0.0627     0.6723   -0.243  0.6217              NA -0.7003
## Other           -0.6376    -0.0279   -0.943 -0.0786         -0.7003      NA

describe2(d %>% select(IQ), group = d$race_combos_common) %>% df_round(2)

## # A tibble: 6 × 11
##   group          var   n     mean  median sd    mad   min   max   skew  kurtosis
##   <chr>          <chr> <chr> <chr> <chr>  <chr> <chr> <chr> <chr> <chr> <chr>   
## 1 black          IQ    " 47… " 91… " 89.… "14.… "13.… 61.98 120.… 0.34  -0.50   
## 2 east_asian     IQ    " 17… "100… "103.… "16.… "21.… 79.90 134.… 0.30  -1.26   
## 3 hispanic       IQ    " 10… " 86… " 83.… " 9.… " 9.… 74.58 102.… 0.33  -1.61   
## 4 white          IQ    "327… "100… " 99.… "15.… "16.… 68.20 145.… 0.40  -0.35   
## 5 white, hispan… IQ    "  8… " 90… " 91.… " 9.… "13.… 80.15 107.… 0.37  -1.40   
## 6 Other          IQ    " 32… "101… " 98.… "18.… "18.… 75.66 149.… 0.54  -0.53

SMD_matrix(d$IQ, d$bw)[1, 2]

## [1] -0.568

describe2(d$IQ, d$bw)

## New names:
## • `` -> `...1`

## # A tibble: 3 × 11
##   group var       n  mean median    sd   mad   min   max  skew kurtosis
##   <fct> <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 Black ...1     47  91.5   89.5  14.0  13.9  62.0  120. 0.344   -0.500
## 2 White ...1    327 100     99.0  15    16.3  68.2  146. 0.397   -0.350
## 3 <NA>  ...1     67  97.7   94.6  17.0  17.8  74.6  149. 0.752   -0.115

#plot results to see if they make sense
d %>% 
  GG_denhist("IQ", "bw") +
  scale_fill_discrete("Race") +
  scale_x_continuous("IQ", breaks = seq(0, 200, by = 5))

## Warning in GG_denhist(., "IQ", "bw"): Grouping variable contained missing
## values. These were removed. If you want an NA group, convert to explicit value.

## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/dists for blacks and whites.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#adjust for reliability
adj_d_reliability(
  SMD_matrix(d$IQ, d$bw)[1, 2],
  empirical_rxx(good_items_fit_scores)
)

##     F1 
## -0.577

Jensen method

#black white gap
good_items_stats$itemstats$bw_gap = map_dbl(itemdata %>% select(-id) %>% names(), function(v) {
  #subset to the focus item, and the black and the white pure groups
  focus_item = itemdata[[v]]
  focus_item_white = focus_item[d$white_only]
  focus_item_black = focus_item[d$black_only]
  
  #pass rate to z score
  white_z = wtd_mean(focus_item_white) %>% qnorm()
  black_z = wtd_mean(focus_item_black) %>% qnorm()
  
  #gap in z
  z_gap = white_z - black_z
  
  #Inf to NA
  inf_to_NA(z_gap)
})
sum(d$white_only)

## [1] 327

sum(d$black_only)

## [1] 47

#example
itemdata[NA_to_F(d$bw == "White"), 1] %>% mean()

## [1] 0.847

itemdata[NA_to_F(d$bw == "Black"), 1] %>% mean()

## [1] 0.766

itemdata[NA_to_F(d$bw == "White"), 1] %>% mean() %>% qnorm()

## [1] 1.02

itemdata[NA_to_F(d$bw == "Black"), 1] %>% mean() %>% qnorm()

## [1] 0.726

itemdata[NA_to_F(d$bw == "White"), 1] %>% mean() %>% qnorm() - itemdata[NA_to_F(d$bw == "Black"), 1] %>% mean() %>% qnorm()

## [1] 0.298

#spearman jensen method
good_items_stats$itemstats %>% 
  rownames_to_column() %>% 
  GG_scatter("loading", "bw_gap", case_names = "rowname") +
  scale_x_continuous("Factor loading", breaks = seq(-1, 1, .1)) +
  scale_y_continuous("Black-White gap, d")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/Jensen method items.png")

## `geom_smooth()` using formula = 'y ~ x'

#gap at perfect reliability
(jensen_method_ols = ols(bw_gap ~ loading, data = good_items_stats$itemstats))

## Frequencies of Missing Values Due to Each Variable
##  bw_gap loading 
##       3       0 
## 
## Linear Regression Model
## 
## ols(formula = bw_gap ~ loading, data = good_items_stats$itemstats)
## 
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     216    LR chi2     23.13    R2       0.102    
## sigma0.2258    d.f.            1    R2 adj   0.097    
## d.f.    214    Pr(> chi2) 0.0000    g        0.087    
## 
## Residuals
## 
##       Min        1Q    Median        3Q       Max 
## -0.693528 -0.137776  0.002642  0.135475  0.920415 
## 
## 
##           Coef   S.E.   t    Pr(>|t|)
## Intercept 0.1200 0.0594 2.02 0.0447  
## loading   0.4470 0.0909 4.92 <0.0001

predict(jensen_method_ols, newdata = tibble(loading = 1))

##     1 
## 0.567

lm(bw_gap ~ 0 + loading, data = good_items_stats$itemstats)

## 
## Call:
## lm(formula = bw_gap ~ 0 + loading, data = good_items_stats$itemstats)
## 
## Coefficients:
## loading  
##   0.624

Black-White DIF

#black-white bias
bw_bias_fit = cache_object(
  expr = {DIF_test(
    items = itemdata %>% select(-id),
    model = 1,
    group = d$bw,
    technical = list(NCYCLES = 2000)
  )
  }, filename = "DIF_test.rds")

## Cache found, reading object from disk

#DIF items
bw_bias_fit$DIF_stats %>% filter(p < .05)

##                                item      groups converged    AIC   SABIC     HQ
## 1  a_type_of_mathematical_operation Black,White      TRUE  -3.63  -2.132 -0.519
## 2                          auspices Black,White      TRUE  -2.94  -1.435  0.178
## 3                            berate Black,White      TRUE  -2.68  -1.176  0.437
## 4                             cheat Black,White      TRUE  -2.38  -0.875  0.739
## 5                     embarrassment Black,White      TRUE  -5.83  -4.328 -2.715
## 6                            empire Black,White      TRUE  -2.57  -1.069  0.544
## 7                            gutter Black,White      TRUE  -2.04  -0.534  1.079
## 8                     indescribable Black,White      TRUE  -5.18  -3.678 -2.065
## 9                              meal Black,White      TRUE -12.38 -10.872 -9.259
## 10                         mutually Black,White      TRUE  -2.28  -0.780  0.833
## 11                      nonsensical Black,White      TRUE  -4.00  -2.492 -0.878
## 12                       referendum Black,White      TRUE  -2.07  -0.566  1.047
## 13                           summit Black,White      TRUE  -2.83  -1.328  0.285
## 14                           2of5_9 Black,White      TRUE  -3.76  -2.255 -0.642
## 15                          2of5_11 Black,White      TRUE  -5.62  -4.112 -2.498
## 16                          2of5_22 Black,White      TRUE  -2.57  -1.067  0.547
## 17                           3of5_5 Black,White      TRUE  -2.68  -1.177  0.436
## 18                          3of5_13 Black,White      TRUE  -4.92  -3.421 -1.808
## 19                          3of5_25 Black,White      TRUE  -7.46  -5.961 -4.347
## 20                          3of5_32 Black,White      TRUE  -5.55  -4.046 -2.433
##       BIC     X2 df     p number p_adj
## 1   4.213  7.635  2 0.022     10 1.000
## 2   4.910  6.938  2 0.031     27 1.000
## 3   5.169  6.679  2 0.035     30 1.000
## 4   5.471  6.378  2 0.041     39 1.000
## 5   2.017  9.831  2 0.007     66 1.000
## 6   5.277  6.572  2 0.037     68 1.000
## 7   5.812  6.037  2 0.049     85 1.000
## 8   2.667  9.181  2  0.01     91 1.000
## 9  -4.526 16.375  2     0     97 0.061
## 10  5.565  6.284  2 0.043     99 1.000
## 11  3.854  7.995  2 0.018    101 1.000
## 12  5.779  6.069  2 0.048    113 1.000
## 13  5.017  6.831  2 0.033    139 1.000
## 14  4.090  7.758  2 0.021    163 1.000
## 15  2.234  9.615  2 0.008    165 1.000
## 16  5.279   6.57  2 0.037    176 1.000
## 17  5.168   6.68  2 0.035    195 1.000
## 18  2.924  8.924  2 0.012    202 1.000
## 19  0.385 11.464  2 0.003    212 0.707
## 20  2.300  9.549  2 0.008    219 1.000

bw_bias_fit$DIF_stats %>% filter(p_adj < .05)

##  [1] item      groups    converged AIC       SABIC     HQ        BIC      
##  [8] X2        df        p         number    p_adj    
## <0 rows> (or 0-length row.names)

bw_bias_fit$effect_size_test

## $liberal
##           Effect Size  Value
## 1                STDS 0.6593
## 2                UTDS 2.8640
## 3              UETSDS 0.6699
## 4               ETSSD 0.0206
## 5         Starks.DTFR 0.6294
## 6               UDTFR 2.8953
## 7              UETSDN 0.6364
## 8 theta.of.max.test.D 4.9240
## 9           Test.Dmax 2.1516
## 
## $conservative
##           Effect Size Value
## 1                STDS 0.000
## 2                UTDS 0.000
## 3              UETSDS 0.000
## 4               ETSSD 0.000
## 5         Starks.DTFR 0.000
## 6               UDTFR 0.000
## 7              UETSDN 0.000
## 8 theta.of.max.test.D 0.971
## 9           Test.Dmax 0.000

bw_bias_fit$DIF_stats$item_number = seq_along_rows(bw_bias_fit$DIF_stats)

#plot items
bw_bias_fit$fits$anchor_conservative %>% plot(type = "trace")

# not working because there is no DIF?
# bw_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = bw_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))

# this function shows there is no DIF
filtered_items <- bw_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number)

if (length(filtered_items) > 0) {
  bw_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = filtered_items)
} else {
  message("No items flagged for DIF at p_adj < .05.")
}

## No items flagged for DIF at p_adj < .05.

bw_bias_fit$fits$anchor_liberal %>% plot(type = "trace", which.items = bw_bias_fit$DIF_stats %>% filter(p < .05) %>% pull(item_number))

#plot bias for test
bw_bias_fit$fits$anchor_conservative %>% plot(type = "score")

#bw difference
SMD_matrix(d$IQ, d$bw)

##        Black  White
## Black     NA -0.568
## White -0.568     NA

describe2(d$IQ, d$bw)

## New names:
## • `` -> `...1`

## # A tibble: 3 × 11
##   group var       n  mean median    sd   mad   min   max  skew kurtosis
##   <fct> <chr> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 Black ...1     47  91.5   89.5  14.0  13.9  62.0  120. 0.344   -0.500
## 2 White ...1    327 100     99.0  15    16.3  68.2  146. 0.397   -0.350
## 3 <NA>  ...1     67  97.7   94.6  17.0  17.8  74.6  149. 0.752   -0.115

Race (black-white) differences in an English vocabulary test using an online Prolific sample (corrected)

Meng Hu

2025-02-26

Check this file to find the “item data” rds and “main data” rds files used in Kirkegaard main analysis here: https://osf.io/6gcy4/

Init

Functions

Data

Encoding

Analysis

Basic

Overall fit

Norms

Basic stats

Jensen method

Black-White DIF