This analysis takes care of bad data (by removing them). About 58 individuals failed item checks (4 items) or failed items that are exceedingly easy (4 items). This reflects both low motivation and/or poor English. These 8 items are also removed from the analysis. Results are similar to the previous analysis that include all individuals.
Sys.setenv(LANG = "en") # make R environment in english
library(ggpubr)
library(kirkegaard)
library(tictoc)
load_packages(
readxl,
mirt,
googlesheets4,
rms,
ggeffects,
future, furrr
)
theme_set(theme_bw())
options(
digits = 3
)
mirtCluster()
## mirtCluster() previously defined for 31 clusters
plan(multisession(workers = 7))
#delete cache and re-run all (=T if you already ran this code before, or else the cache will contain outdated data)
renew_all = F
# This code does the same thing, clearing cache
# List of RDS files to be cleared
# rds_files <- c(
# "data/vocab_irt.rds",
# "data/hard_items_fit.rds",
# "data/all_items_fit.rds",
# "data/good_items_fit.rds",
# "data/sex_bias_fit.rds",
# "data/vocab_abbrev_rc50.rds"
# )
# Function to remove RDS files if they exist
clear_rds_files <- function(files) {
for (file in files) {
if (file.exists(file)) {
file.remove(file)
message(paste("Removed:", file))
} else {
message(paste("File not found:", file))
}
}
}
# Clear the RDS files
# clear_rds_files(rds_files)
#coverage of a given minimum of reliability
reliability_range = function(x, min_reliability) {
map_df(min_reliability, \(m) {
x %>%
filter(rel >= m) %$%
tibble(
min_reliability = m,
lower_z = min(z),
upper_z = max(z),
coverage = pnorm(upper_z) - pnorm(lower_z)
)
})
}
#find item choices based on item names
find_items = function(x) {
items = map_chr(x, function(y) {
#item is from different objects depending on the name
if (!str_detect(y, "of5_")) {
easy_items_choices[y]
} else if (str_detect(y, "1of5_")) {
hard_items_1of5_options[y]
} else if (str_detect(y, "2of5_")) {
hard_items_2of5_options[y]
} else if (str_detect(y, "3of5_")) {
hard_items_3of5_options[y]
}
})
names(items) = x
items
}
#if any of the xof5 items, they are from hard pool
if (F) {
d_prolific = read_csv("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\prolific_export_64877cb0ca7d649ce538e74d.csv") %>% df_legalize_names()
d_alchemer = read_csv("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\20230618204902-SurveyExport.csv") %>% df_legalize_names() %>% filter(Status == "Complete")
assert_that(!any(duplicated(d_prolific$Participant_id)))
assert_that(!any(duplicated(d_alchemer$Write_in_your_Prolific_ID)))
d_alchemer$Write_in_your_Prolific_ID %>% table2()
#join on prolific id
d = inner_join(
d_prolific,
d_alchemer,
by = c("Participant_id" = "Write_in_your_Prolific_ID")
)
assert_that(!any(duplicated(d$Participant_id)))
d %>%
select(
-Submission_id,
-IP_Address,
-Referer,
-SessionID,
-User_Agent
) %>%
write_rds("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\raw data.rds", compress = "xz")
}
#load prepared data without sensitive information
d = read_rds("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\raw data.rds")
d$native_speaker = d$Is_English_one_of_your_native_languages == "Yes"
table2(d$native_speaker)
## # A tibble: 3 × 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 TRUE 491 98.4
## 2 FALSE 8 1.60
## 3 <NA> 0 0
#recode covariates
d$age = d$How_old_are_you %>% str_match("\\d+") %>% as.numeric()
d$sex = d$What_is_your_biological_sex %>% mapvalues(from = c("Male (Y chromosome)", "Female (no Y chromosome)"), to = c("Male", "Female")) %>% factor(levels = c("Male", "Female"))
ethnicity_vars = d %>% select(White_European_Which_of_the_following_racial_ethnic_groups_do_you_identify_with:Other_Which_of_the_following_racial_ethnic_groups_do_you_identify_with) %>% names()
d$white = d$White_European_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$jewish = d$Jewish_Ashkenazi_Sephardic_Misrahi_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$black = d$Black_African_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$hispanic = d$Hispanic_Latino_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$middle_eastern = d$Middle_Eastern_North_African_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$south_asian = d$South_Asian_Indian_subcontinent_excluding_Pakistani_and_Bangladeshi_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$southest_asian = d$Southeast_Asian_Bangladeshi_Vietnamese_Burmese_etc_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$east_asian = d$East_Asian_Chinese_Korean_Japanese_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$polynesian = d$Polynesian_Pacific_Islander_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$native_american = d$Native_American_Amerindian_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
d$other_race = d$Other_Which_of_the_following_racial_ethnic_groups_do_you_identify_with %>% is.na() %>% `!`() %>% factor()
ethnicity_simple_vars = d %>% select(white:other_race) %>% names() %>% factor()
d$white_only = as.logical(d$white) & (d %>% select(white:other_race) %>% select(-white) %>% map_df(as.logical) %>% rowSums() %>% equals(0))
d$black_only = as.logical(d$black) & (d %>% select(white:other_race) %>% select(-black) %>% map_df(as.logical) %>% rowSums() %>% equals(0))
#combinations
d$race_combos = d %>% select(white:other_race) %>% encode_combinations()
d$race_combos %>% table2()
## # A tibble: 24 × 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 white 359 71.9
## 2 black 63 12.6
## 3 east_asian 19 3.81
## 4 hispanic 14 2.81
## 5 white, hispanic 9 1.80
## 6 south_asian 5 1.00
## 7 white, jewish 5 1.00
## 8 southest_asian 4 0.802
## 9 black, native_american 2 0.401
## 10 middle_eastern 2 0.401
## # ℹ 14 more rows
#common combos
d$race_combos_common = d$race_combos %>% fct_lump_min(min = 9)
d$race_combos_common %>% table2()
## # A tibble: 7 × 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 white 359 71.9
## 2 black 63 12.6
## 3 Other 35 7.01
## 4 east_asian 19 3.81
## 5 hispanic 14 2.81
## 6 white, hispanic 9 1.80
## 7 <NA> 0 0
d$sex %>% table2()
## # A tibble: 3 × 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 Female 260 52.1
## 2 Male 239 47.9
## 3 <NA> 0 0
#table
d_vars = df_var_table(d)
items_of_interest <- c("silly", "avoid", "remove", "construct")
answer_counts <- d %>%
select(all_of(items_of_interest)) %>%
pivot_longer(cols = everything(), names_to = "item", values_to = "response") %>%
filter(!is.na(response)) %>%
group_by(item, response) %>%
summarise(count = n(), .groups = "drop") %>%
arrange(item, desc(count))
print(answer_counts)
## # A tibble: 17 × 3
## item response count
## <chr> <chr> <int>
## 1 avoid evade 493
## 2 avoid ddd 2
## 3 avoid ggg 2
## 4 avoid fff 1
## 5 avoid hhh 1
## 6 construct create 496
## 7 construct ggg 2
## 8 construct ddd 1
## 9 remove abolish 493
## 10 remove ddd 2
## 11 remove fff 2
## 12 remove hhh 2
## 13 silly childish 491
## 14 silly hhh 3
## 15 silly ddd 2
## 16 silly ggg 2
## 17 silly fff 1
d <- d %>% filter(silly == "childish" & avoid == "evade" & remove == "abolish" & construct == "create" & evil == "wicked" & cheerful =="happy" & aroma == "odor" & farewell == "adieu") # remove people who failed either item checks or extremely easy items
nrow(d) # 441 cases
## [1] 441
easy_items = d %>% select(evil:cheat) %>% select(-c(aroma, cheerful, evil, farewell)) # skip the 4 attention checks and 4 obvious answers
# Easy items
scoring_key = read_excel("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\answer keys for 155 items.xlsx", col_names = c("word", "correct", "notes"))
items_to_remove <- c("silly", "avoid", "remove", "construct", "aroma", "cheerful", "evil", "farewell")
scoring_key <- scoring_key %>%
filter(!word %in% items_to_remove) %>%
arrange(word)
nrow(scoring_key) # 151 items
## [1] 151
#sort data colnames alphabetically
colnames_clean = easy_items %>% colnames() %>% str_clean()
order(colnames_clean)
## [1] 71 32 80 56 53 125 137 4 34 39 132 104 116 1 65 87 110 121 79 130 90 22 97 37 41 81 148 75 44 94 27
## [32] 108 21 147 144 96 47 129 151 59 9 43 66 51 101 42 60 54 100 38 33 123 12 92 103 7 114 143 6 95 118 112
## [63] 61 124 19 26 3 57 14 141 150 76 8 83 122 68 16 24 105 28 35 117 78 46 30 49 86 62 23 99 142 18 72
## [94] 5 36 128 11 140 91 10 20 52 69 134 50 77 70 48 2 40 29 107 63 149 109 145 67 88 82 84 106 146 45 131
## [125] 133 120 64 102 119 127 111 115 15 93 17 58 55 98 136 89 113 13 73 139 138 25 126 135 31 85 74
easy_items_sorted = easy_items[, order(colnames_clean)]
#check matches
vocab_matches = tibble(
scoring_key_word = scoring_key$word,
scoring_key_correct = scoring_key$correct,
data = easy_items_sorted %>% colnames()
)
#stubborn is duplicated, but fortunately in the right order
easy_items_scored = score_items(
easy_items_sorted,
scoring_key$correct
)
easy_items_table = map_df(easy_items, table2)
#count of correct
d$vocab_sumscore = rowSums(easy_items_scored)
vocab_irt = cache_object(filename = "data/vocab_irt.rds", expr = mirt(
easy_items_scored,
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 5000),
optimizer = "BFGS",
method = "EM",
guess = .20
),
renew = renew_all)
## Cache found, reading object from disk
#results
vocab_irt
##
## Call:
## mirt(data = easy_items_scored, model = 1, itemtype = "2PL", guess = 0.2,
## method = "EM", optimizer = "BFGS", technical = list(NCYCLES = 5000))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 301 EM iterations.
## mirt version: 1.42
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -23138
## Estimated parameters: 302
## AIC = 46880
## BIC = 48115; SABIC = 47157
## G2 (1e+10) = 40930, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
vocab_irt %>% summary()
## F1 h2
## a_specific_number 0.460 0.2116
## a_type_of_brass_instrument 0.846 0.7153
## a_type_of_drapery 0.663 0.4401
## a_type_of_fabric_101 0.718 0.5152
## a_type_of_fabric_98 0.744 0.5534
## a_type_of_grasshopper 0.819 0.6704
## a_type_of_hat 0.775 0.6003
## a_type_of_heating_unit 0.517 0.2670
## a_type_of_magistrate_position 0.749 0.5613
## a_type_of_mathematical_operation 0.548 0.3001
## a_type_of_overshoe 0.569 0.3237
## a_type_of_plant 0.760 0.5783
## a_type_of_religious_teacher 0.765 0.5853
## a_type_of_rock 0.327 0.1066
## a_type_of_skirts 0.670 0.4490
## a_type_of_smokeless_powder 0.481 0.2312
## a_type_of_sound 0.551 0.3033
## a_type_of_sword 0.424 0.1795
## a_type_of_volcanic_crater 0.641 0.4114
## a_type_of_wavy_form 0.784 0.6147
## actionable_negligence 0.759 0.5757
## advisory 0.645 0.4166
## advocate 0.717 0.5142
## almighty 0.863 0.7449
## amazement 0.622 0.3867
## amenability 0.755 0.5702
## auspices 0.821 0.6733
## barely_able_to_read_and_write 0.829 0.6877
## because 0.674 0.4540
## berate 0.821 0.6746
## blade 0.735 0.5404
## blend 0.662 0.4381
## blunder 0.802 0.6424
## bow 0.859 0.7387
## brief 0.528 0.2790
## bring_about 0.782 0.6118
## carelessly_or_hastily_put_together 0.610 0.3717
## celebration 0.727 0.5287
## cheat 0.841 0.7066
## clay_pigeon_shooting 0.735 0.5403
## coarse 0.760 0.5771
## collect_or_study_insects 0.624 0.3888
## colossal 0.725 0.5262
## commotion 0.916 0.8392
## complainer 0.820 0.6720
## confiscate 0.905 0.8183
## congratulate 0.594 0.3531
## contemplate 0.456 0.2081
## convoy 0.889 0.7912
## couch 0.696 0.4842
## crease 0.755 0.5701
## cunning 0.662 0.4378
## deceit 0.626 0.3919
## deliberately 0.776 0.6022
## deprive 0.617 0.3807
## detectable 0.324 0.1053
## diatribes 0.683 0.4662
## disjoined 0.422 0.1783
## disrupt 0.747 0.5579
## distinct 0.272 0.0741
## divergence 0.851 0.7246
## dome 0.576 0.3317
## downfall 0.729 0.5316
## drink 0.901 0.8115
## elite 0.715 0.5105
## embarrassment 0.588 0.3453
## emphasize 0.820 0.6716
## empire 0.522 0.2729
## empty 0.735 0.5400
## entanglement 0.785 0.6162
## environment 0.841 0.7070
## excite 0.793 0.6283
## flammable 0.871 0.7590
## flatterer 0.868 0.7532
## flatteries 0.668 0.4459
## forever 0.791 0.6263
## frequent 0.140 0.0197
## gigantic 0.710 0.5045
## girl 0.762 0.5803
## goo 0.668 0.4460
## goodbye 0.804 0.6463
## greed 0.916 0.8382
## groan 0.814 0.6626
## gruesome 0.667 0.4454
## guarantee 0.282 0.0794
## gutter 0.631 0.3982
## harmfulness 0.493 0.2428
## hobby 0.799 0.6383
## hut 0.775 0.6007
## illness 0.538 0.2896
## impromptu 0.753 0.5677
## indescribable 0.717 0.5139
## intellectual 0.707 0.4998
## jargon 0.903 0.8161
## knowledgeable 0.829 0.6869
## lackadaisical 0.654 0.4280
## manager 0.849 0.7200
## meal 0.916 0.8386
## melodic 0.741 0.5483
## mutually 0.836 0.6984
## nonsense 0.847 0.7176
## nonsensical 0.812 0.6588
## not_coveted 0.669 0.4469
## pamper 0.819 0.6711
## penitentiary 0.553 0.3062
## perplexing 0.658 0.4328
## persistence 0.639 0.4090
## predetermine 0.524 0.2746
## pretender 0.798 0.6364
## questioning 0.641 0.4113
## quickly 0.667 0.4455
## rebellious 0.799 0.6391
## referee 0.739 0.5462
## referendum 0.695 0.4834
## relating_to_the_right 0.401 0.1610
## relevant 0.753 0.5668
## respectful 0.760 0.5775
## retailer 0.593 0.3519
## retract 0.519 0.2696
## ropes 0.802 0.6440
## sag 0.625 0.3907
## schemer 0.791 0.6258
## seize 0.775 0.6006
## sensitivity 0.737 0.5426
## shadows 0.723 0.5234
## sketch 0.697 0.4861
## slang 0.812 0.6599
## slave 0.507 0.2570
## sluggish 0.670 0.4492
## somber 0.702 0.4931
## spinelessness 0.628 0.3939
## sporadic 0.890 0.7926
## squad 0.943 0.8887
## stagger 0.664 0.4412
## stinking 0.906 0.8203
## stroll 0.900 0.8092
## stubborn_100 0.539 0.2902
## stubborn_143 0.639 0.4078
## stylish 0.914 0.8358
## summit 0.744 0.5539
## terminology 0.878 0.7715
## the_science_of_speech_sounds 0.708 0.5020
## transportation 0.367 0.1349
## tyrant 0.874 0.7643
## unhealthful 0.647 0.4180
## vile 0.875 0.7656
## vulgar 0.894 0.7988
## wandering 0.522 0.2729
## warning 0.621 0.3854
## wave 0.727 0.5283
## weaponry 0.681 0.4642
##
## SS loadings: 77.8
## Proportion Var: 0.515
##
## Factor correlations:
##
## F1
## F1 1
vocab_irt %>% coef(simplify = T)
## $items
## a1 d g u
## a_specific_number 0.882 1.610 0.2 1
## a_type_of_brass_instrument 2.698 4.314 0.2 1
## a_type_of_drapery 1.509 1.782 0.2 1
## a_type_of_fabric_101 1.755 3.420 0.2 1
## a_type_of_fabric_98 1.894 1.340 0.2 1
## a_type_of_grasshopper 2.427 2.352 0.2 1
## a_type_of_hat 2.086 -2.000 0.2 1
## a_type_of_heating_unit 1.027 1.959 0.2 1
## a_type_of_magistrate_position 1.925 2.926 0.2 1
## a_type_of_mathematical_operation 1.115 2.647 0.2 1
## a_type_of_overshoe 1.178 0.231 0.2 1
## a_type_of_plant 1.993 2.492 0.2 1
## a_type_of_religious_teacher 2.022 2.449 0.2 1
## a_type_of_rock 0.588 2.807 0.2 1
## a_type_of_skirts 1.536 1.840 0.2 1
## a_type_of_smokeless_powder 0.933 0.958 0.2 1
## a_type_of_sound 1.123 0.511 0.2 1
## a_type_of_sword 0.796 0.000 0.2 1
## a_type_of_volcanic_crater 1.423 2.042 0.2 1
## a_type_of_wavy_form 2.150 0.265 0.2 1
## actionable_negligence 1.982 2.264 0.2 1
## advisory 1.438 3.705 0.2 1
## advocate 1.751 1.251 0.2 1
## almighty 2.908 5.449 0.2 1
## amazement 1.352 2.775 0.2 1
## amenability 1.960 2.132 0.2 1
## auspices 2.443 -0.851 0.2 1
## barely_able_to_read_and_write 2.526 3.199 0.2 1
## because 1.552 2.836 0.2 1
## berate 2.450 2.833 0.2 1
## blade 1.846 3.592 0.2 1
## blend 1.503 2.804 0.2 1
## blunder 2.281 4.692 0.2 1
## bow 2.861 0.075 0.2 1
## brief 1.059 -0.158 0.2 1
## bring_about 2.137 2.713 0.2 1
## carelessly_or_hastily_put_together 1.309 2.093 0.2 1
## celebration 1.803 1.342 0.2 1
## cheat 2.641 1.421 0.2 1
## clay_pigeon_shooting 1.845 3.698 0.2 1
## coarse 1.988 3.786 0.2 1
## collect_or_study_insects 1.357 3.022 0.2 1
## colossal 1.794 1.898 0.2 1
## commotion 3.889 3.976 0.2 1
## complainer 2.436 2.559 0.2 1
## confiscate 3.611 4.538 0.2 1
## congratulate 1.257 2.274 0.2 1
## contemplate 0.872 2.238 0.2 1
## convoy 3.313 2.184 0.2 1
## couch 1.649 1.258 0.2 1
## crease 1.960 2.124 0.2 1
## cunning 1.502 1.493 0.2 1
## deceit 1.366 1.041 0.2 1
## deliberately 2.094 2.018 0.2 1
## deprive 1.334 1.802 0.2 1
## detectable 0.584 3.263 0.2 1
## diatribes 1.591 0.153 0.2 1
## disjoined 0.793 -0.608 0.2 1
## disrupt 1.912 4.194 0.2 1
## distinct 0.482 1.599 0.2 1
## divergence 2.761 1.184 0.2 1
## dome 1.199 0.986 0.2 1
## downfall 1.813 2.852 0.2 1
## drink 3.532 2.321 0.2 1
## elite 1.738 2.188 0.2 1
## embarrassment 1.236 2.123 0.2 1
## emphasize 2.434 5.042 0.2 1
## empire 1.043 1.478 0.2 1
## empty 1.844 2.243 0.2 1
## entanglement 2.156 -0.359 0.2 1
## environment 2.644 -0.933 0.2 1
## excite 2.213 1.108 0.2 1
## flammable 3.020 7.082 0.2 1
## flatterer 2.973 3.064 0.2 1
## flatteries 1.527 1.037 0.2 1
## forever 2.203 2.708 0.2 1
## frequent 0.241 3.562 0.2 1
## gigantic 1.717 3.767 0.2 1
## girl 2.001 2.695 0.2 1
## goo 1.527 4.174 0.2 1
## goodbye 2.301 4.245 0.2 1
## greed 3.873 0.885 0.2 1
## groan 2.385 3.581 0.2 1
## gruesome 1.525 1.879 0.2 1
## guarantee 0.500 2.440 0.2 1
## gutter 1.384 3.210 0.2 1
## harmfulness 0.964 1.907 0.2 1
## hobby 2.261 2.937 0.2 1
## hut 2.088 2.944 0.2 1
## illness 1.087 0.755 0.2 1
## impromptu 1.951 0.254 0.2 1
## indescribable 1.750 3.903 0.2 1
## intellectual 1.701 3.537 0.2 1
## jargon 3.586 6.195 0.2 1
## knowledgeable 2.521 2.406 0.2 1
## lackadaisical 1.472 1.049 0.2 1
## manager 2.729 6.548 0.2 1
## meal 3.880 0.217 0.2 1
## melodic 1.875 2.530 0.2 1
## mutually 2.590 5.263 0.2 1
## nonsense 2.713 3.275 0.2 1
## nonsensical 2.365 5.236 0.2 1
## not_coveted 1.530 2.076 0.2 1
## pamper 2.431 -1.368 0.2 1
## penitentiary 1.131 2.619 0.2 1
## perplexing 1.487 2.696 0.2 1
## persistence 1.416 2.108 0.2 1
## predetermine 1.047 2.530 0.2 1
## pretender 2.252 6.440 0.2 1
## questioning 1.423 2.586 0.2 1
## quickly 1.526 1.461 0.2 1
## rebellious 2.265 2.390 0.2 1
## referee 1.867 2.902 0.2 1
## referendum 1.646 -1.498 0.2 1
## relating_to_the_right 0.746 0.578 0.2 1
## relevant 1.947 0.903 0.2 1
## respectful 1.990 2.453 0.2 1
## retailer 1.254 1.425 0.2 1
## retract 1.034 2.021 0.2 1
## ropes 2.289 2.696 0.2 1
## sag 1.363 0.870 0.2 1
## schemer 2.201 -0.047 0.2 1
## seize 2.087 2.639 0.2 1
## sensitivity 1.854 1.115 0.2 1
## shadows 1.784 0.718 0.2 1
## sketch 1.655 0.955 0.2 1
## slang 2.371 2.278 0.2 1
## slave 1.001 1.139 0.2 1
## sluggish 1.537 1.102 0.2 1
## somber 1.679 0.479 0.2 1
## spinelessness 1.372 1.751 0.2 1
## sporadic 3.327 0.969 0.2 1
## squad 4.809 11.250 0.2 1
## stagger 1.512 1.304 0.2 1
## stinking 3.637 3.821 0.2 1
## stroll 3.505 1.631 0.2 1
## stubborn_100 1.088 1.922 0.2 1
## stubborn_143 1.412 2.221 0.2 1
## stylish 3.840 -1.702 0.2 1
## summit 1.897 2.434 0.2 1
## terminology 3.127 1.778 0.2 1
## the_science_of_speech_sounds 1.709 4.094 0.2 1
## transportation 0.672 0.331 0.2 1
## tyrant 3.064 -3.522 0.2 1
## unhealthful 1.443 -1.265 0.2 1
## vile 3.076 5.142 0.2 1
## vulgar 3.391 0.034 0.2 1
## wandering 1.043 -0.228 0.2 1
## warning 1.348 2.430 0.2 1
## wave 1.801 1.924 0.2 1
## weaponry 1.584 1.913 0.2 1
##
## $means
## F1
## 0
##
## $cov
## F1
## F1 1
vocab_irt_scores = fscores(vocab_irt, full.scores.SE = T)
empirical_rxx(vocab_irt_scores)
## F1
## 0.958
plot(vocab_irt, type = "rxx")
d$g_easy = vocab_irt_scores[, 1] %>% standardize()
vocab_item_stats = itemstats(easy_items_scored)
vocab_item_stats$itemstats$g_loading = vocab_irt %>% summary() %>% .$rotF %>% .[, 1]
## F1 h2
## a_specific_number 0.460 0.2116
## a_type_of_brass_instrument 0.846 0.7153
## a_type_of_drapery 0.663 0.4401
## a_type_of_fabric_101 0.718 0.5152
## a_type_of_fabric_98 0.744 0.5534
## a_type_of_grasshopper 0.819 0.6704
## a_type_of_hat 0.775 0.6003
## a_type_of_heating_unit 0.517 0.2670
## a_type_of_magistrate_position 0.749 0.5613
## a_type_of_mathematical_operation 0.548 0.3001
## a_type_of_overshoe 0.569 0.3237
## a_type_of_plant 0.760 0.5783
## a_type_of_religious_teacher 0.765 0.5853
## a_type_of_rock 0.327 0.1066
## a_type_of_skirts 0.670 0.4490
## a_type_of_smokeless_powder 0.481 0.2312
## a_type_of_sound 0.551 0.3033
## a_type_of_sword 0.424 0.1795
## a_type_of_volcanic_crater 0.641 0.4114
## a_type_of_wavy_form 0.784 0.6147
## actionable_negligence 0.759 0.5757
## advisory 0.645 0.4166
## advocate 0.717 0.5142
## almighty 0.863 0.7449
## amazement 0.622 0.3867
## amenability 0.755 0.5702
## auspices 0.821 0.6733
## barely_able_to_read_and_write 0.829 0.6877
## because 0.674 0.4540
## berate 0.821 0.6746
## blade 0.735 0.5404
## blend 0.662 0.4381
## blunder 0.802 0.6424
## bow 0.859 0.7387
## brief 0.528 0.2790
## bring_about 0.782 0.6118
## carelessly_or_hastily_put_together 0.610 0.3717
## celebration 0.727 0.5287
## cheat 0.841 0.7066
## clay_pigeon_shooting 0.735 0.5403
## coarse 0.760 0.5771
## collect_or_study_insects 0.624 0.3888
## colossal 0.725 0.5262
## commotion 0.916 0.8392
## complainer 0.820 0.6720
## confiscate 0.905 0.8183
## congratulate 0.594 0.3531
## contemplate 0.456 0.2081
## convoy 0.889 0.7912
## couch 0.696 0.4842
## crease 0.755 0.5701
## cunning 0.662 0.4378
## deceit 0.626 0.3919
## deliberately 0.776 0.6022
## deprive 0.617 0.3807
## detectable 0.324 0.1053
## diatribes 0.683 0.4662
## disjoined 0.422 0.1783
## disrupt 0.747 0.5579
## distinct 0.272 0.0741
## divergence 0.851 0.7246
## dome 0.576 0.3317
## downfall 0.729 0.5316
## drink 0.901 0.8115
## elite 0.715 0.5105
## embarrassment 0.588 0.3453
## emphasize 0.820 0.6716
## empire 0.522 0.2729
## empty 0.735 0.5400
## entanglement 0.785 0.6162
## environment 0.841 0.7070
## excite 0.793 0.6283
## flammable 0.871 0.7590
## flatterer 0.868 0.7532
## flatteries 0.668 0.4459
## forever 0.791 0.6263
## frequent 0.140 0.0197
## gigantic 0.710 0.5045
## girl 0.762 0.5803
## goo 0.668 0.4460
## goodbye 0.804 0.6463
## greed 0.916 0.8382
## groan 0.814 0.6626
## gruesome 0.667 0.4454
## guarantee 0.282 0.0794
## gutter 0.631 0.3982
## harmfulness 0.493 0.2428
## hobby 0.799 0.6383
## hut 0.775 0.6007
## illness 0.538 0.2896
## impromptu 0.753 0.5677
## indescribable 0.717 0.5139
## intellectual 0.707 0.4998
## jargon 0.903 0.8161
## knowledgeable 0.829 0.6869
## lackadaisical 0.654 0.4280
## manager 0.849 0.7200
## meal 0.916 0.8386
## melodic 0.741 0.5483
## mutually 0.836 0.6984
## nonsense 0.847 0.7176
## nonsensical 0.812 0.6588
## not_coveted 0.669 0.4469
## pamper 0.819 0.6711
## penitentiary 0.553 0.3062
## perplexing 0.658 0.4328
## persistence 0.639 0.4090
## predetermine 0.524 0.2746
## pretender 0.798 0.6364
## questioning 0.641 0.4113
## quickly 0.667 0.4455
## rebellious 0.799 0.6391
## referee 0.739 0.5462
## referendum 0.695 0.4834
## relating_to_the_right 0.401 0.1610
## relevant 0.753 0.5668
## respectful 0.760 0.5775
## retailer 0.593 0.3519
## retract 0.519 0.2696
## ropes 0.802 0.6440
## sag 0.625 0.3907
## schemer 0.791 0.6258
## seize 0.775 0.6006
## sensitivity 0.737 0.5426
## shadows 0.723 0.5234
## sketch 0.697 0.4861
## slang 0.812 0.6599
## slave 0.507 0.2570
## sluggish 0.670 0.4492
## somber 0.702 0.4931
## spinelessness 0.628 0.3939
## sporadic 0.890 0.7926
## squad 0.943 0.8887
## stagger 0.664 0.4412
## stinking 0.906 0.8203
## stroll 0.900 0.8092
## stubborn_100 0.539 0.2902
## stubborn_143 0.639 0.4078
## stylish 0.914 0.8358
## summit 0.744 0.5539
## terminology 0.878 0.7715
## the_science_of_speech_sounds 0.708 0.5020
## transportation 0.367 0.1349
## tyrant 0.874 0.7643
## unhealthful 0.647 0.4180
## vile 0.875 0.7656
## vulgar 0.894 0.7988
## wandering 0.522 0.2729
## warning 0.621 0.3854
## wave 0.727 0.5283
## weaponry 0.681 0.4642
##
## SS loadings: 77.8
## Proportion Var: 0.515
##
## Factor correlations:
##
## F1
## F1 1
vocab_item_stats$itemstats$mean %>% GG_denhist()
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
vocab_item_stats$itemstats$mean %>% describe2()
## # A tibble: 1 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 x 151 0.806 0.848 0.141 0.124 0.333 0.993 -1.14 0.933
GG_scatter(d, "vocab_sumscore", "g_easy")
## `geom_smooth()` using formula = 'y ~ x'
GG_denhist(d, "vocab_sumscore")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_denhist(d, "g_easy")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d$vocab_sumscore_estimate = d$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct
d$vocab_ranking_estimate = d$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did
d %>% select(
vocab_sumscore,
g_easy,
vocab_sumscore_estimate,
vocab_ranking_estimate
) %>% wtd.cors()
## vocab_sumscore g_easy vocab_sumscore_estimate vocab_ranking_estimate
## vocab_sumscore 1.000 0.954 0.678 0.497
## g_easy 0.954 1.000 0.703 0.517
## vocab_sumscore_estimate 0.678 0.703 1.000 0.781
## vocab_ranking_estimate 0.497 0.517 0.781 1.000
GG_scatter(d, "Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct", "vocab_sumscore")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d, "Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did", "vocab_sumscore")
## `geom_smooth()` using formula = 'y ~ x'
describeBy(d$vocab_sumscore, d$I_was_a_slow_learner_in_school)
##
## Descriptive statistics by group
## group: No
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 385 123 22.7 129 126 22.2 32 151 119 -0.94 0.26 1.16
## ------------------------------------------------------------------------------------------------
## group: Yes
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 56 112 22.4 109 112 24.5 61 149 88 0.05 -0.97 3
describeBy(d$vocab_sumscore, d$I_like_to_read_about_science)
##
## Descriptive statistics by group
## group: No
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 98 112 22.4 110 113 24.5 55 150 95 -0.17 -0.74 2.27
## ------------------------------------------------------------------------------------------------
## group: Yes
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 343 124 22.4 131 127 19.3 32 151 119 -1.03 0.48 1.21
describeBy(d$vocab_sumscore, d$A_person_shouldn_t_be_punished_for_breaking_a_law_that_he_thinks_is_unreasonable)
##
## Descriptive statistics by group
## group: No
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 392 122 22.9 128 125 23.7 32 151 119 -0.79 -0.11 1.16
## ------------------------------------------------------------------------------------------------
## group: Yes
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 49 117 23.2 121 119 23.7 57 148 91 -0.79 -0.1 3.31
#restandardize g
d$g_easy_ageadj = resid(ols(g_easy ~ rcs(age), data = d)) %>% unname()
## number of knots in rcs defaulting to 5
d$g_easy_ageadj_z = standardize(d$g_easy_ageadj, focal_group = d$white_only)
# Hard items
hard_items = read_csv("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\follow up 20231029043500-SurveyExport.csv") %>% filter(Status == "Complete")
## New names:
## Rows: 475 Columns: 373
## ── Column specification
## ───────────────────────────────────────────────────────────────────────────────────────────────────────── Delimiter: "," chr
## (360): Status, Language, Referer, SessionID, User Agent, IP Address, Country, City, State/Region, Postal, Write in your ... dbl
## (7): Response ID, Longitude, Latitude, New Hidden Value...206, New Hidden Value...367, New Hidden Value...372, New Hid... lgl
## (4): Contact ID, Legacy Comments, Comments, Tags dttm (2): Time Started, Date Submitted
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ Specify the column types or set `show_col_types =
## FALSE` to quiet this message.
## • `recallable:Pick the 2 synonyms` -> `recallable:Pick the 2 synonyms...23`
## • `recallable:Pick the 2 synonyms` -> `recallable:Pick the 2 synonyms...90`
## • `New Hidden Value` -> `New Hidden Value...206`
## • `sack:Pick 3 words that belong together` -> `sack:Pick 3 words that belong together...279`
## • `sack:Pick 3 words that belong together` -> `sack:Pick 3 words that belong together...283`
## • `jaunty:Pick 3 words that belong together` -> `jaunty:Pick 3 words that belong together...336`
## • `jaunty:Pick 3 words that belong together` -> `jaunty:Pick 3 words that belong together...361`
## • `New Hidden Value` -> `New Hidden Value...367`
## • `New Hidden Value` -> `New Hidden Value...372`
## • `New Hidden Value` -> `New Hidden Value...373`
hard_items_meta = read_csv("C:\\Users\\mh198\\OneDrive\\Documents\\Data\\Prolific\\A new, open source English vocabulary test\\vocab, mmpi, politics\\data\\follow up prolific_export_649a005bfc9bd0688f8e3304.csv")
## Rows: 465 Columns: 19
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): Submission id, Participant id, Status, Completion code, Age, Sex, Ethnicity simplified, Country of birth, Country ...
## dbl (2): Time taken, Total approvals
## dttm (4): Started at, Completed at, Reviewed at, Archived at
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hard_items_var_list = df_var_table(hard_items)
#only completed data, only overlap
hard_items_meta %<>% filter(`Participant id` %in% hard_items$`Write in your Prolific ID:`)
hard_items %<>% filter(`Write in your Prolific ID:` %in% hard_items_meta$`Participant id`)
#subset to participants who passed our 8 attention checks
hard_items <- semi_join( #return all rows from x with a match in y
hard_items,
d,
by = c("Write in your Prolific ID:" = "Participant_id")
)
nrow(hard_items)
## [1] 383
#no dups
assert_that(!anyDuplicated(hard_items$`Write in your Prolific ID:`))
## [1] TRUE
assert_that(!anyDuplicated(hard_items_meta$`Participant id`))
## [1] TRUE
#subset items
hard_items_2of5 = hard_items %>% select(21:205)
hard_items_3of5 = hard_items %>% select(207:366)
hard_items_1of5 = hard_items %>% select(368:371)
#score them
#1st options is always the right one
#but we can't see option order with csv export
hard_items_1of5_scored = score_items(hard_items_1of5,
key = c(
"whispering",
"verbiage",
"fragrance",
"sagacious"
)) %>% as_tibble()
hard_items_2of5_scored = map_df(seq_along(hard_items_2of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_2of5[, unlist(idx)]
i_cols_NA = i_cols
i_cols_NA[] = !is.na(i_cols_NA)
#score as correct using options 1+2
(i_cols_NA[, 1] & i_cols_NA[, 2]) %>% as.vector() %>% as.numeric()
})
hard_items_3of5_scored = map_df(seq_along(hard_items_3of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_3of5[, unlist(idx)]
i_cols_NA = i_cols
i_cols_NA[] = !is.na(i_cols_NA)
#score as correct using options 1+2
(i_cols_NA[, 1] & i_cols_NA[, 2] & i_cols_NA[, 3]) %>% as.vector() %>% as.numeric()
})
#combine
hard_items_scored = bind_cols(
hard_items_1of5_scored %>% set_names("1of5_" + seq_along(hard_items_1of5_scored)),
hard_items_2of5_scored %>% set_names("2of5_" + seq_along(hard_items_2of5_scored)),
hard_items_3of5_scored %>% set_names("3of5_" + seq_along(hard_items_3of5_scored))
)
#item stats
hard_items_scored_stats = itemstats(hard_items_scored)
hard_items$sumscore = rowSums(hard_items_scored)
hard_items <- hard_items %>% rename(time2 = 372) # total time for hard test
#IRT fit
hard_items_fit = cache_object(filename = "data/hard_items_fit.rds", expr = mirt(
hard_items_scored,
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 5000),
),
renew = renew_all)
## Cache found, reading object from disk
hard_items_fit
##
## Call:
## mirt(data = hard_items_scored, model = 1, itemtype = "2PL", technical = list(NCYCLES = 5000))
##
## Full-information item factor analysis with 1 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 5000 EM iterations.
## mirt version: 1.42
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -14410
## Estimated parameters: 146
## AIC = 29111
## BIC = 29688; SABIC = 29225
## G2 (1e+10) = 24263, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
hard_items_fit %>% summary()
## F1 h2
## 1of5_1 0.4828 0.23313
## 1of5_2 0.3170 0.10048
## 1of5_3 0.3975 0.15803
## 1of5_4 0.3409 0.11624
## 2of5_1 0.4744 0.22505
## 2of5_2 0.5564 0.30955
## 2of5_3 0.5791 0.33535
## 2of5_4 0.6299 0.39681
## 2of5_5 0.5014 0.25143
## 2of5_6 0.7330 0.53727
## 2of5_7 0.3625 0.13142
## 2of5_8 0.6099 0.37193
## 2of5_9 0.5720 0.32723
## 2of5_10 0.3786 0.14337
## 2of5_11 0.3674 0.13496
## 2of5_12 0.4459 0.19881
## 2of5_13 0.4915 0.24157
## 2of5_14 0.4411 0.19455
## 2of5_15 0.6868 0.47171
## 2of5_16 0.5960 0.35516
## 2of5_17 0.6897 0.47566
## 2of5_18 0.7421 0.55064
## 2of5_19 0.4780 0.22844
## 2of5_20 0.4095 0.16772
## 2of5_21 0.5408 0.29244
## 2of5_22 0.9027 0.81483
## 2of5_23 0.3318 0.11010
## 2of5_24 0.2422 0.05868
## 2of5_25 0.7235 0.52343
## 2of5_26 0.7245 0.52491
## 2of5_27 0.6160 0.37941
## 2of5_28 0.2964 0.08783
## 2of5_29 0.5670 0.32147
## 2of5_30 0.5707 0.32570
## 2of5_31 0.5459 0.29801
## 2of5_32 0.6786 0.46052
## 2of5_33 0.3562 0.12691
## 2of5_34 0.4856 0.23577
## 2of5_35 0.3152 0.09936
## 2of5_36 0.8505 0.72336
## 2of5_37 0.9777 0.95591
## 3of5_1 0.2567 0.06588
## 3of5_2 0.5414 0.29309
## 3of5_3 0.5198 0.27016
## 3of5_4 0.7462 0.55682
## 3of5_5 0.3335 0.11122
## 3of5_6 0.7531 0.56723
## 3of5_7 0.6111 0.37350
## 3of5_8 0.4374 0.19131
## 3of5_9 0.2678 0.07171
## 3of5_10 0.8271 0.68412
## 3of5_11 0.7382 0.54499
## 3of5_12 0.5714 0.32652
## 3of5_13 0.3597 0.12940
## 3of5_14 0.4754 0.22599
## 3of5_15 -0.0516 0.00266
## 3of5_16 0.3488 0.12169
## 3of5_17 0.4429 0.19618
## 3of5_18 0.3327 0.11070
## 3of5_19 -0.2561 0.06558
## 3of5_20 0.5689 0.32367
## 3of5_21 0.4006 0.16047
## 3of5_22 0.4663 0.21744
## 3of5_23 0.7927 0.62835
## 3of5_24 0.5444 0.29633
## 3of5_25 0.5522 0.30494
## 3of5_26 0.7385 0.54540
## 3of5_27 0.6120 0.37451
## 3of5_28 0.3782 0.14307
## 3of5_29 0.4970 0.24700
## 3of5_30 0.5716 0.32670
## 3of5_31 0.4063 0.16505
## 3of5_32 0.5819 0.33863
##
## SS loadings: 22
## Proportion Var: 0.301
##
## Factor correlations:
##
## F1
## F1 1
hard_items_scored_stats$itemstats$g_loading = hard_items_fit %>% summary() %>% .$rotF %>% as.vector()
## F1 h2
## 1of5_1 0.4828 0.23313
## 1of5_2 0.3170 0.10048
## 1of5_3 0.3975 0.15803
## 1of5_4 0.3409 0.11624
## 2of5_1 0.4744 0.22505
## 2of5_2 0.5564 0.30955
## 2of5_3 0.5791 0.33535
## 2of5_4 0.6299 0.39681
## 2of5_5 0.5014 0.25143
## 2of5_6 0.7330 0.53727
## 2of5_7 0.3625 0.13142
## 2of5_8 0.6099 0.37193
## 2of5_9 0.5720 0.32723
## 2of5_10 0.3786 0.14337
## 2of5_11 0.3674 0.13496
## 2of5_12 0.4459 0.19881
## 2of5_13 0.4915 0.24157
## 2of5_14 0.4411 0.19455
## 2of5_15 0.6868 0.47171
## 2of5_16 0.5960 0.35516
## 2of5_17 0.6897 0.47566
## 2of5_18 0.7421 0.55064
## 2of5_19 0.4780 0.22844
## 2of5_20 0.4095 0.16772
## 2of5_21 0.5408 0.29244
## 2of5_22 0.9027 0.81483
## 2of5_23 0.3318 0.11010
## 2of5_24 0.2422 0.05868
## 2of5_25 0.7235 0.52343
## 2of5_26 0.7245 0.52491
## 2of5_27 0.6160 0.37941
## 2of5_28 0.2964 0.08783
## 2of5_29 0.5670 0.32147
## 2of5_30 0.5707 0.32570
## 2of5_31 0.5459 0.29801
## 2of5_32 0.6786 0.46052
## 2of5_33 0.3562 0.12691
## 2of5_34 0.4856 0.23577
## 2of5_35 0.3152 0.09936
## 2of5_36 0.8505 0.72336
## 2of5_37 0.9777 0.95591
## 3of5_1 0.2567 0.06588
## 3of5_2 0.5414 0.29309
## 3of5_3 0.5198 0.27016
## 3of5_4 0.7462 0.55682
## 3of5_5 0.3335 0.11122
## 3of5_6 0.7531 0.56723
## 3of5_7 0.6111 0.37350
## 3of5_8 0.4374 0.19131
## 3of5_9 0.2678 0.07171
## 3of5_10 0.8271 0.68412
## 3of5_11 0.7382 0.54499
## 3of5_12 0.5714 0.32652
## 3of5_13 0.3597 0.12940
## 3of5_14 0.4754 0.22599
## 3of5_15 -0.0516 0.00266
## 3of5_16 0.3488 0.12169
## 3of5_17 0.4429 0.19618
## 3of5_18 0.3327 0.11070
## 3of5_19 -0.2561 0.06558
## 3of5_20 0.5689 0.32367
## 3of5_21 0.4006 0.16047
## 3of5_22 0.4663 0.21744
## 3of5_23 0.7927 0.62835
## 3of5_24 0.5444 0.29633
## 3of5_25 0.5522 0.30494
## 3of5_26 0.7385 0.54540
## 3of5_27 0.6120 0.37451
## 3of5_28 0.3782 0.14307
## 3of5_29 0.4970 0.24700
## 3of5_30 0.5716 0.32670
## 3of5_31 0.4063 0.16505
## 3of5_32 0.5819 0.33863
##
## SS loadings: 22
## Proportion Var: 0.301
##
## Factor correlations:
##
## F1
## F1 1
hard_items_scored_stats$itemstats
## N mean sd total.r total.r_if_rm alpha_if_rm g_loading
## 1of5_1 383 0.305 0.461 0.417 0.387 0.927 0.483
## 1of5_2 383 0.303 0.460 0.274 0.241 0.928 0.317
## 1of5_3 383 0.245 0.431 0.323 0.294 0.928 0.398
## 1of5_4 383 0.431 0.496 0.319 0.285 0.928 0.341
## 2of5_1 383 0.475 0.500 0.418 0.387 0.927 0.474
## 2of5_2 383 0.269 0.444 0.451 0.424 0.927 0.556
## 2of5_3 383 0.465 0.499 0.483 0.454 0.927 0.579
## 2of5_4 383 0.266 0.443 0.502 0.477 0.927 0.630
## 2of5_5 383 0.554 0.498 0.433 0.402 0.927 0.501
## 2of5_6 383 0.713 0.453 0.534 0.509 0.927 0.733
## 2of5_7 383 0.313 0.464 0.323 0.291 0.928 0.363
## 2of5_8 383 0.245 0.431 0.488 0.463 0.927 0.610
## 2of5_9 383 0.436 0.497 0.499 0.470 0.927 0.572
## 2of5_10 383 0.423 0.495 0.339 0.305 0.928 0.379
## 2of5_11 383 0.527 0.500 0.334 0.300 0.928 0.367
## 2of5_12 383 0.232 0.423 0.354 0.326 0.928 0.446
## 2of5_13 383 0.326 0.469 0.419 0.389 0.927 0.492
## 2of5_14 383 0.136 0.343 0.298 0.275 0.928 0.441
## 2of5_15 383 0.721 0.449 0.491 0.465 0.927 0.687
## 2of5_16 383 0.493 0.501 0.506 0.477 0.927 0.596
## 2of5_17 383 0.569 0.496 0.566 0.540 0.926 0.690
## 2of5_18 383 0.402 0.491 0.625 0.601 0.926 0.742
## 2of5_19 383 0.522 0.500 0.413 0.381 0.927 0.478
## 2of5_20 383 0.559 0.497 0.363 0.330 0.928 0.410
## 2of5_21 383 0.266 0.443 0.448 0.421 0.927 0.541
## 2of5_22 383 0.567 0.496 0.723 0.705 0.925 0.903
## 2of5_23 383 0.282 0.451 0.298 0.266 0.928 0.332
## 2of5_24 383 0.238 0.426 0.199 0.168 0.929 0.242
## 2of5_25 383 0.543 0.499 0.590 0.565 0.926 0.723
## 2of5_26 383 0.467 0.500 0.618 0.594 0.926 0.725
## 2of5_27 383 0.256 0.437 0.494 0.469 0.927 0.616
## 2of5_28 383 0.554 0.498 0.273 0.238 0.928 0.296
## 2of5_29 383 0.420 0.494 0.496 0.467 0.927 0.567
## 2of5_30 383 0.580 0.494 0.478 0.448 0.927 0.571
## 2of5_31 383 0.298 0.458 0.446 0.418 0.927 0.546
## 2of5_32 383 0.117 0.322 0.424 0.404 0.927 0.679
## 2of5_33 383 0.272 0.445 0.298 0.267 0.928 0.356
## 2of5_34 383 0.509 0.501 0.431 0.400 0.927 0.486
## 2of5_35 383 0.433 0.496 0.286 0.251 0.928 0.315
## 2of5_36 383 0.990 0.102 0.140 0.132 0.928 0.851
## 2of5_37 383 0.997 0.051 0.096 0.092 0.929 0.978
## 3of5_1 383 0.407 0.492 0.257 0.222 0.928 0.257
## 3of5_2 383 0.157 0.364 0.378 0.354 0.928 0.541
## 3of5_3 383 0.185 0.389 0.382 0.357 0.928 0.520
## 3of5_4 383 0.480 0.500 0.625 0.601 0.926 0.746
## 3of5_5 383 0.394 0.489 0.311 0.278 0.928 0.333
## 3of5_6 383 0.475 0.500 0.616 0.592 0.926 0.753
## 3of5_7 383 0.230 0.421 0.472 0.447 0.927 0.611
## 3of5_8 383 0.099 0.299 0.269 0.248 0.928 0.437
## 3of5_9 383 0.264 0.441 0.231 0.199 0.928 0.268
## 3of5_10 383 0.794 0.405 0.511 0.488 0.927 0.827
## 3of5_11 383 0.311 0.463 0.605 0.583 0.926 0.738
## 3of5_12 383 0.499 0.501 0.489 0.460 0.927 0.571
## 3of5_13 383 0.180 0.385 0.263 0.236 0.928 0.360
## 3of5_14 383 0.587 0.493 0.391 0.359 0.928 0.475
## 3of5_15 383 0.285 0.452 -0.010 -0.044 0.930 -0.052
## 3of5_16 383 0.467 0.500 0.313 0.279 0.928 0.349
## 3of5_17 383 0.368 0.483 0.385 0.354 0.928 0.443
## 3of5_18 383 0.428 0.495 0.296 0.261 0.928 0.333
## 3of5_19 383 0.078 0.269 -0.083 -0.103 0.929 -0.256
## 3of5_20 383 0.339 0.474 0.487 0.459 0.927 0.569
## 3of5_21 383 0.601 0.490 0.338 0.304 0.928 0.401
## 3of5_22 383 0.540 0.499 0.394 0.362 0.928 0.466
## 3of5_23 383 0.898 0.303 0.370 0.350 0.928 0.793
## 3of5_24 383 0.637 0.481 0.441 0.411 0.927 0.544
## 3of5_25 383 0.379 0.486 0.473 0.444 0.927 0.552
## 3of5_26 383 0.373 0.484 0.622 0.599 0.926 0.739
## 3of5_27 383 0.178 0.383 0.438 0.414 0.927 0.612
## 3of5_28 383 0.815 0.389 0.247 0.219 0.928 0.378
## 3of5_29 383 0.825 0.380 0.293 0.267 0.928 0.497
## 3of5_30 383 0.433 0.496 0.490 0.461 0.927 0.572
## 3of5_31 383 0.702 0.458 0.316 0.285 0.928 0.406
## 3of5_32 383 0.705 0.457 0.432 0.403 0.927 0.582
hard_items_scored_stats$itemstats %>% describe2()
## # A tibble: 7 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 N 73 383 383 0 0 383 383 NaN NaN
## 2 mean 73 0.436 0.428 0.206 0.205 0.0783 0.997 0.651 0.114
## 3 sd 73 0.445 0.469 0.0826 0.0411 0.0511 0.501 -2.74 8.87
## 4 total.r 73 0.397 0.417 0.143 0.127 -0.0834 0.723 -0.604 1.15
## 5 total.r_if_rm 73 0.369 0.387 0.144 0.134 -0.103 0.705 -0.520 1.03
## 6 alpha_if_rm 73 0.927 0.927 0.000813 0.000801 0.925 0.930 -0.00918 0.342
## 7 g_loading 73 0.512 0.520 0.199 0.177 -0.256 0.978 -0.684 2.27
#difficulties
hard_items_scored_stats$itemstats$mean %>% GG_denhist()
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
#loadings
hard_items_scored_stats$itemstats$g_loading %>% GG_denhist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
plot(hard_items_fit, type = "rxx")
hard_items_fit_scores = fscores(hard_items_fit, full.scores.SE = T)
empirical_rxx(hard_items_fit_scores)
## F1
## 0.938
#time spent vs. score
# hard_items_scored$time_spent_mins = (hard_items$`Date Submitted`-hard_items$`Time Started`)/60
#exact choices table
hard_items_2of5_choices = map_df(seq_along(hard_items_2of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_2of5[, unlist(idx)]
#string collapse across columns
apply(i_cols, 1, function(x) {
str_c(na.omit(x), collapse = ", ")
})
}) %>% set_colnames("pick2of5_" + 1:ncol(hard_items_2of5_scored))
hard_items_3of5_choices = map_df(seq_along(hard_items_3of5) %>% split_every_k(k = 5), function(idx) {
# browser()
#subset cols
i_cols = hard_items_3of5[, unlist(idx)]
#string collapse across columns
apply(i_cols, 1, function(x) {
str_c(na.omit(x), collapse = ", ")
})
}) %>% set_colnames("pick3of5_" + 1:ncol(hard_items_3of5_scored))
# Full test
#merge data
all_items_scored = left_join(
easy_items_scored %>% mutate(id = d$Participant_id),
hard_items_scored %>% mutate(id = hard_items$`Write in your Prolific ID:`),
by = "id"
)
#no dups
assert_that(!anyDuplicated(all_items_scored$id))
## [1] TRUE
d1 = left_join(
all_items_scored,
hard_items %>% mutate(id = hard_items$`Write in your Prolific ID:`),
by = "id"
)
d$time<-d$New_Hidden_Value_197 # time since survey taker started the current page
dk <- d1 %>% left_join(d %>% select(Participant_id, age, time), by = c("id" = "Participant_id"))
start_col <- which(names(dk) == "a_specific_number")
end_col <- which(names(dk) == "weaponry")
column_names <- names(dk)[start_col:end_col]
dk <- dk %>% mutate(easy_test = rowSums(select(., all_of(column_names)), na.rm = TRUE))
start_col <- which(names(dk) == "1of5_1")
end_col <- which(names(dk) == "3of5_32")
column_names <- names(dk)[start_col:end_col]
dk <- dk %>% mutate(hard_test = rowSums(select(., all_of(column_names)), na.rm = TRUE))
dk <- dk %>% mutate(hard_missing = case_when(hard_test >0 ~ 1, hard_test ==0 ~ 0))
print(describeBy(dk$easy_test, dk$hard_missing))
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 58 112 24.5 114 113 31.1 64 150 86 -0.16 -1.25 3.22
## ------------------------------------------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 383 123 22.4 129 126 22.2 32 151 119 -0.9 0.27 1.15
na_count <- sum(is.na(dk$sumscore))
print(na_count)
## [1] 58
print(describeBy(d$Total_approvals))
## Warning in describeBy(d$Total_approvals): no grouping variable requested
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 441 1494 1278 1177 1321 1173 4 6633 6629 1.18 1.12 60.8
cor(dk$age, dk$easy_test, use="pairwise.complete.obs", method="pearson")
## [1] 0.347
dk0 <- dk %>% filter(hard_test > 0)
nrow(dk0)
## [1] 383
ggplot(dk0, aes(x = easy_test, y = hard_test)) +
geom_point() +
stat_cor(method = "pearson") + # displays correlation in the plot
labs(title = "Scatter Plot of Easy Test vs Hard Test",
x = "Easy Test",
y = "Hard Test")
dk0$age <- as.numeric(dk0$age)
ggplot(dk0, aes(x = age, y = hard_test)) +
geom_point() +
stat_cor(method = "pearson") + # displays correlation in the plot, but only if age is numeric
labs(title = "Hard test versus Age",
x = "age",
y = "Hard Test")
cor(dk0$easy_test, dk0$hard_test, use="pairwise.complete.obs", method="pearson")
## [1] 0.814
cor(dk0$age, dk0$easy_test, use="pairwise.complete.obs", method="pearson")
## [1] 0.334
cor(dk0$age, dk0$hard_test, use="pairwise.complete.obs", method="pearson")
## [1] 0.325
cor(dk0$time, dk0$easy_test, use="pairwise.complete.obs", method="pearson")
## [1] -0.143
cor(dk0$time2, dk0$hard_test, use="pairwise.complete.obs", method="pearson")
## [1] -0.0248
dk0 <- dk0 %>% mutate(dk0, aged= case_when(
age>=70~1,
age<=69~0))
print(describeBy(dk0$time, dk0$aged))
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 358 1437 745 1222 1316 480 501 5024 4523 1.92 4.57 39.4
## ------------------------------------------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 25 1199 375 1114 1153 249 779 2363 1584 1.31 1.52 75
print(describeBy(dk0$time2, dk0$aged))
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 358 1406 2204 1117 1165 481 363 35564 35201 12.6 178 116
## ------------------------------------------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 25 1270 421 1229 1237 535 670 2220 1550 0.59 -0.55 84.2
print(describeBy(dk0$easy_test, dk0$aged))
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 358 122 22.5 128 125 24.5 32 151 119 -0.85 0.18 1.19
## ------------------------------------------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 25 134 18.4 140 137 8.9 68 151 83 -1.93 4 3.69
print(describeBy(dk0$hard_test, dk0$aged))
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 358 31.3 13.2 29 30.7 14.8 7 66 59 0.37 -0.83 0.7
## ------------------------------------------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 25 39.6 13.4 41 39.8 13.3 15 65 50 -0.2 -0.97 2.69
d$time<-d$New_Hidden_Value_197 # time since survey taker started the current page
ggplot(d, aes(x = time, y = age)) +
geom_point() +
stat_cor(method = "pearson") + # displays correlation in the plot, but only if age is numeric
labs(title = "Test Time versus Age",
x = "Test Time",
y = "age")
#fit all dataset to same model
#takes a while to converge at default settings
#not even after 20k iter
all_items_fit = cache_object(filename = "data/all_items_fit.rds", expr = mirt(
all_items_scored %>% select(-id),
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 2000)
),
renew = renew_all)
## Cache found, reading object from disk
#item stats
all_items_stats = itemstats(all_items_scored %>% select(-id))
all_items_fit
##
## Call:
## mirt(data = all_items_scored %>% select(-id), model = 1, itemtype = "2PL",
## technical = list(NCYCLES = 2000))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 322 EM iterations.
## mirt version: 1.42
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -37382
## Estimated parameters: 448
## AIC = 75660
## BIC = 77492; SABIC = 76070
all_items_fit %>% summary()
## F1 h2
## a_specific_number 0.4560 0.20789
## a_type_of_brass_instrument 0.8771 0.76923
## a_type_of_drapery 0.6508 0.42360
## a_type_of_fabric_101 0.7424 0.55123
## a_type_of_fabric_98 0.6988 0.48836
## a_type_of_grasshopper 0.8023 0.64370
## a_type_of_hat 0.5737 0.32917
## a_type_of_heating_unit 0.4979 0.24789
## a_type_of_magistrate_position 0.7441 0.55364
## a_type_of_mathematical_operation 0.5695 0.32429
## a_type_of_overshoe 0.4956 0.24566
## a_type_of_plant 0.7337 0.53827
## a_type_of_religious_teacher 0.7790 0.60687
## a_type_of_rock 0.3189 0.10172
## a_type_of_skirts 0.6417 0.41179
## a_type_of_smokeless_powder 0.4664 0.21751
## a_type_of_sound 0.4894 0.23953
## a_type_of_sword 0.3558 0.12657
## a_type_of_volcanic_crater 0.6664 0.44412
## a_type_of_wavy_form 0.7062 0.49872
## actionable_negligence 0.7682 0.59008
## advisory 0.7231 0.52283
## advocate 0.7009 0.49129
## almighty 0.9138 0.83505
## amazement 0.6603 0.43605
## amenability 0.7384 0.54527
## auspices 0.6870 0.47199
## barely_able_to_read_and_write 0.8397 0.70510
## because 0.6806 0.46319
## berate 0.8359 0.69873
## blade 0.7819 0.61141
## blend 0.6814 0.46427
## blunder 0.8589 0.73774
## bow 0.7508 0.56370
## brief 0.4324 0.18699
## bring_about 0.7888 0.62228
## carelessly_or_hastily_put_together 0.6081 0.36974
## celebration 0.7185 0.51622
## cheat 0.8051 0.64823
## clay_pigeon_shooting 0.7660 0.58677
## coarse 0.7755 0.60136
## collect_or_study_insects 0.6253 0.39096
## colossal 0.7116 0.50634
## commotion 0.9020 0.81358
## complainer 0.7944 0.63115
## confiscate 0.9086 0.82559
## congratulate 0.5907 0.34891
## contemplate 0.4685 0.21948
## convoy 0.8540 0.72929
## couch 0.6500 0.42252
## crease 0.7622 0.58099
## cunning 0.6379 0.40695
## deceit 0.5741 0.32964
## deliberately 0.7601 0.57774
## deprive 0.6354 0.40377
## detectable 0.3379 0.11416
## diatribes 0.5787 0.33491
## disjoined 0.3118 0.09721
## disrupt 0.7905 0.62488
## distinct 0.2755 0.07590
## divergence 0.7954 0.63259
## dome 0.5515 0.30420
## downfall 0.7742 0.59934
## drink 0.8507 0.72361
## elite 0.6987 0.48813
## embarrassment 0.6018 0.36217
## emphasize 0.8795 0.77355
## empire 0.5053 0.25538
## empty 0.7586 0.57543
## entanglement 0.6907 0.47709
## environment 0.6582 0.43321
## excite 0.7748 0.60036
## flammable 0.9196 0.84567
## flatterer 0.8670 0.75177
## flatteries 0.6458 0.41708
## forever 0.7978 0.63656
## frequent 0.0918 0.00843
## gigantic 0.7624 0.58127
## girl 0.7792 0.60716
## goo 0.7159 0.51247
## goodbye 0.8195 0.67163
## greed 0.8404 0.70630
## groan 0.8507 0.72368
## gruesome 0.6646 0.44166
## guarantee 0.2832 0.08018
## gutter 0.6566 0.43107
## harmfulness 0.5228 0.27333
## hobby 0.8052 0.64835
## hut 0.8105 0.65694
## illness 0.4915 0.24155
## impromptu 0.6602 0.43582
## indescribable 0.7615 0.57986
## intellectual 0.7103 0.50459
## jargon 0.9257 0.85691
## knowledgeable 0.8164 0.66652
## lackadaisical 0.6056 0.36677
## manager 0.8667 0.75116
## meal 0.7912 0.62601
## melodic 0.7282 0.53030
## mutually 0.8726 0.76141
## nonsense 0.8601 0.73980
## nonsensical 0.8710 0.75860
## not_coveted 0.6638 0.44058
## pamper 0.5901 0.34827
## penitentiary 0.5260 0.27671
## perplexing 0.6897 0.47566
## persistence 0.6367 0.40544
## predetermine 0.5420 0.29378
## pretender 0.8563 0.73318
## questioning 0.6609 0.43685
## quickly 0.6566 0.43113
## rebellious 0.8024 0.64392
## referee 0.7714 0.59502
## referendum 0.5253 0.27596
## relating_to_the_right 0.3501 0.12259
## relevant 0.6983 0.48765
## respectful 0.7700 0.59296
## retailer 0.6003 0.36041
## retract 0.5233 0.27385
## ropes 0.7983 0.63724
## sag 0.5958 0.35496
## schemer 0.6689 0.44738
## seize 0.7927 0.62842
## sensitivity 0.7169 0.51397
## shadows 0.6587 0.43386
## sketch 0.6537 0.42734
## slang 0.8019 0.64312
## slave 0.4811 0.23146
## sluggish 0.6341 0.40204
## somber 0.6054 0.36651
## spinelessness 0.6318 0.39922
## sporadic 0.8308 0.69028
## squad 0.9747 0.95006
## stagger 0.6521 0.42519
## stinking 0.8760 0.76741
## stroll 0.8714 0.75940
## stubborn_100 0.5511 0.30372
## stubborn_143 0.6541 0.42788
## stylish 0.6914 0.47802
## summit 0.7555 0.57083
## terminology 0.8288 0.68694
## the_science_of_speech_sounds 0.7507 0.56350
## transportation 0.2980 0.08879
## tyrant 0.4395 0.19317
## unhealthful 0.4189 0.17545
## vile 0.8810 0.77609
## vulgar 0.7191 0.51711
## wandering 0.4151 0.17229
## warning 0.6374 0.40624
## wave 0.7200 0.51835
## weaponry 0.6755 0.45632
## 1of5_1 0.4402 0.19375
## 1of5_2 0.2892 0.08363
## 1of5_3 0.3770 0.14216
## 1of5_4 0.3109 0.09664
## 2of5_1 0.4115 0.16937
## 2of5_2 0.5367 0.28809
## 2of5_3 0.5814 0.33800
## 2of5_4 0.6195 0.38372
## 2of5_5 0.5203 0.27073
## 2of5_6 0.7351 0.54043
## 2of5_7 0.3749 0.14052
## 2of5_8 0.5864 0.34383
## 2of5_9 0.5549 0.30788
## 2of5_10 0.3782 0.14305
## 2of5_11 0.3707 0.13745
## 2of5_12 0.4195 0.17600
## 2of5_13 0.4912 0.24124
## 2of5_14 0.4024 0.16195
## 2of5_15 0.6763 0.45745
## 2of5_16 0.5748 0.33038
## 2of5_17 0.6795 0.46169
## 2of5_18 0.7350 0.54027
## 2of5_19 0.4470 0.19980
## 2of5_20 0.3849 0.14816
## 2of5_21 0.5165 0.26682
## 2of5_22 0.8802 0.77482
## 2of5_23 0.3123 0.09753
## 2of5_24 0.2116 0.04476
## 2of5_25 0.7345 0.53949
## 2of5_26 0.7117 0.50650
## 2of5_27 0.5825 0.33932
## 2of5_28 0.2751 0.07568
## 2of5_29 0.5487 0.30107
## 2of5_30 0.5515 0.30421
## 2of5_31 0.5391 0.29058
## 2of5_32 0.6284 0.39494
## 2of5_33 0.3334 0.11117
## 2of5_34 0.4533 0.20551
## 2of5_35 0.2949 0.08697
## 2of5_36 0.7625 0.58146
## 2of5_37 0.8562 0.73303
## 3of5_1 0.2726 0.07430
## 3of5_2 0.4951 0.24512
## 3of5_3 0.4760 0.22656
## 3of5_4 0.7072 0.50017
## 3of5_5 0.3302 0.10900
## 3of5_6 0.7498 0.56227
## 3of5_7 0.5949 0.35395
## 3of5_8 0.4098 0.16792
## 3of5_9 0.2478 0.06142
## 3of5_10 0.8494 0.72152
## 3of5_11 0.7229 0.52262
## 3of5_12 0.5459 0.29798
## 3of5_13 0.3646 0.13292
## 3of5_14 0.5077 0.25774
## 3of5_15 -0.0448 0.00201
## 3of5_16 0.3256 0.10603
## 3of5_17 0.3855 0.14863
## 3of5_18 0.3197 0.10218
## 3of5_19 -0.2267 0.05139
## 3of5_20 0.5537 0.30654
## 3of5_21 0.4213 0.17750
## 3of5_22 0.4441 0.19724
## 3of5_23 0.8596 0.73893
## 3of5_24 0.5497 0.30220
## 3of5_25 0.5256 0.27624
## 3of5_26 0.7102 0.50445
## 3of5_27 0.5969 0.35632
## 3of5_28 0.4133 0.17084
## 3of5_29 0.5406 0.29221
## 3of5_30 0.5113 0.26144
## 3of5_31 0.4263 0.18169
## 3of5_32 0.5758 0.33151
##
## SS loadings: 94.4
## Proportion Var: 0.421
##
## Factor correlations:
##
## F1
## F1 1
all_items_stats$itemstats$g_loading = all_items_fit %>% summary() %>% .$rotF %>% as.vector()
## F1 h2
## a_specific_number 0.4560 0.20789
## a_type_of_brass_instrument 0.8771 0.76923
## a_type_of_drapery 0.6508 0.42360
## a_type_of_fabric_101 0.7424 0.55123
## a_type_of_fabric_98 0.6988 0.48836
## a_type_of_grasshopper 0.8023 0.64370
## a_type_of_hat 0.5737 0.32917
## a_type_of_heating_unit 0.4979 0.24789
## a_type_of_magistrate_position 0.7441 0.55364
## a_type_of_mathematical_operation 0.5695 0.32429
## a_type_of_overshoe 0.4956 0.24566
## a_type_of_plant 0.7337 0.53827
## a_type_of_religious_teacher 0.7790 0.60687
## a_type_of_rock 0.3189 0.10172
## a_type_of_skirts 0.6417 0.41179
## a_type_of_smokeless_powder 0.4664 0.21751
## a_type_of_sound 0.4894 0.23953
## a_type_of_sword 0.3558 0.12657
## a_type_of_volcanic_crater 0.6664 0.44412
## a_type_of_wavy_form 0.7062 0.49872
## actionable_negligence 0.7682 0.59008
## advisory 0.7231 0.52283
## advocate 0.7009 0.49129
## almighty 0.9138 0.83505
## amazement 0.6603 0.43605
## amenability 0.7384 0.54527
## auspices 0.6870 0.47199
## barely_able_to_read_and_write 0.8397 0.70510
## because 0.6806 0.46319
## berate 0.8359 0.69873
## blade 0.7819 0.61141
## blend 0.6814 0.46427
## blunder 0.8589 0.73774
## bow 0.7508 0.56370
## brief 0.4324 0.18699
## bring_about 0.7888 0.62228
## carelessly_or_hastily_put_together 0.6081 0.36974
## celebration 0.7185 0.51622
## cheat 0.8051 0.64823
## clay_pigeon_shooting 0.7660 0.58677
## coarse 0.7755 0.60136
## collect_or_study_insects 0.6253 0.39096
## colossal 0.7116 0.50634
## commotion 0.9020 0.81358
## complainer 0.7944 0.63115
## confiscate 0.9086 0.82559
## congratulate 0.5907 0.34891
## contemplate 0.4685 0.21948
## convoy 0.8540 0.72929
## couch 0.6500 0.42252
## crease 0.7622 0.58099
## cunning 0.6379 0.40695
## deceit 0.5741 0.32964
## deliberately 0.7601 0.57774
## deprive 0.6354 0.40377
## detectable 0.3379 0.11416
## diatribes 0.5787 0.33491
## disjoined 0.3118 0.09721
## disrupt 0.7905 0.62488
## distinct 0.2755 0.07590
## divergence 0.7954 0.63259
## dome 0.5515 0.30420
## downfall 0.7742 0.59934
## drink 0.8507 0.72361
## elite 0.6987 0.48813
## embarrassment 0.6018 0.36217
## emphasize 0.8795 0.77355
## empire 0.5053 0.25538
## empty 0.7586 0.57543
## entanglement 0.6907 0.47709
## environment 0.6582 0.43321
## excite 0.7748 0.60036
## flammable 0.9196 0.84567
## flatterer 0.8670 0.75177
## flatteries 0.6458 0.41708
## forever 0.7978 0.63656
## frequent 0.0918 0.00843
## gigantic 0.7624 0.58127
## girl 0.7792 0.60716
## goo 0.7159 0.51247
## goodbye 0.8195 0.67163
## greed 0.8404 0.70630
## groan 0.8507 0.72368
## gruesome 0.6646 0.44166
## guarantee 0.2832 0.08018
## gutter 0.6566 0.43107
## harmfulness 0.5228 0.27333
## hobby 0.8052 0.64835
## hut 0.8105 0.65694
## illness 0.4915 0.24155
## impromptu 0.6602 0.43582
## indescribable 0.7615 0.57986
## intellectual 0.7103 0.50459
## jargon 0.9257 0.85691
## knowledgeable 0.8164 0.66652
## lackadaisical 0.6056 0.36677
## manager 0.8667 0.75116
## meal 0.7912 0.62601
## melodic 0.7282 0.53030
## mutually 0.8726 0.76141
## nonsense 0.8601 0.73980
## nonsensical 0.8710 0.75860
## not_coveted 0.6638 0.44058
## pamper 0.5901 0.34827
## penitentiary 0.5260 0.27671
## perplexing 0.6897 0.47566
## persistence 0.6367 0.40544
## predetermine 0.5420 0.29378
## pretender 0.8563 0.73318
## questioning 0.6609 0.43685
## quickly 0.6566 0.43113
## rebellious 0.8024 0.64392
## referee 0.7714 0.59502
## referendum 0.5253 0.27596
## relating_to_the_right 0.3501 0.12259
## relevant 0.6983 0.48765
## respectful 0.7700 0.59296
## retailer 0.6003 0.36041
## retract 0.5233 0.27385
## ropes 0.7983 0.63724
## sag 0.5958 0.35496
## schemer 0.6689 0.44738
## seize 0.7927 0.62842
## sensitivity 0.7169 0.51397
## shadows 0.6587 0.43386
## sketch 0.6537 0.42734
## slang 0.8019 0.64312
## slave 0.4811 0.23146
## sluggish 0.6341 0.40204
## somber 0.6054 0.36651
## spinelessness 0.6318 0.39922
## sporadic 0.8308 0.69028
## squad 0.9747 0.95006
## stagger 0.6521 0.42519
## stinking 0.8760 0.76741
## stroll 0.8714 0.75940
## stubborn_100 0.5511 0.30372
## stubborn_143 0.6541 0.42788
## stylish 0.6914 0.47802
## summit 0.7555 0.57083
## terminology 0.8288 0.68694
## the_science_of_speech_sounds 0.7507 0.56350
## transportation 0.2980 0.08879
## tyrant 0.4395 0.19317
## unhealthful 0.4189 0.17545
## vile 0.8810 0.77609
## vulgar 0.7191 0.51711
## wandering 0.4151 0.17229
## warning 0.6374 0.40624
## wave 0.7200 0.51835
## weaponry 0.6755 0.45632
## 1of5_1 0.4402 0.19375
## 1of5_2 0.2892 0.08363
## 1of5_3 0.3770 0.14216
## 1of5_4 0.3109 0.09664
## 2of5_1 0.4115 0.16937
## 2of5_2 0.5367 0.28809
## 2of5_3 0.5814 0.33800
## 2of5_4 0.6195 0.38372
## 2of5_5 0.5203 0.27073
## 2of5_6 0.7351 0.54043
## 2of5_7 0.3749 0.14052
## 2of5_8 0.5864 0.34383
## 2of5_9 0.5549 0.30788
## 2of5_10 0.3782 0.14305
## 2of5_11 0.3707 0.13745
## 2of5_12 0.4195 0.17600
## 2of5_13 0.4912 0.24124
## 2of5_14 0.4024 0.16195
## 2of5_15 0.6763 0.45745
## 2of5_16 0.5748 0.33038
## 2of5_17 0.6795 0.46169
## 2of5_18 0.7350 0.54027
## 2of5_19 0.4470 0.19980
## 2of5_20 0.3849 0.14816
## 2of5_21 0.5165 0.26682
## 2of5_22 0.8802 0.77482
## 2of5_23 0.3123 0.09753
## 2of5_24 0.2116 0.04476
## 2of5_25 0.7345 0.53949
## 2of5_26 0.7117 0.50650
## 2of5_27 0.5825 0.33932
## 2of5_28 0.2751 0.07568
## 2of5_29 0.5487 0.30107
## 2of5_30 0.5515 0.30421
## 2of5_31 0.5391 0.29058
## 2of5_32 0.6284 0.39494
## 2of5_33 0.3334 0.11117
## 2of5_34 0.4533 0.20551
## 2of5_35 0.2949 0.08697
## 2of5_36 0.7625 0.58146
## 2of5_37 0.8562 0.73303
## 3of5_1 0.2726 0.07430
## 3of5_2 0.4951 0.24512
## 3of5_3 0.4760 0.22656
## 3of5_4 0.7072 0.50017
## 3of5_5 0.3302 0.10900
## 3of5_6 0.7498 0.56227
## 3of5_7 0.5949 0.35395
## 3of5_8 0.4098 0.16792
## 3of5_9 0.2478 0.06142
## 3of5_10 0.8494 0.72152
## 3of5_11 0.7229 0.52262
## 3of5_12 0.5459 0.29798
## 3of5_13 0.3646 0.13292
## 3of5_14 0.5077 0.25774
## 3of5_15 -0.0448 0.00201
## 3of5_16 0.3256 0.10603
## 3of5_17 0.3855 0.14863
## 3of5_18 0.3197 0.10218
## 3of5_19 -0.2267 0.05139
## 3of5_20 0.5537 0.30654
## 3of5_21 0.4213 0.17750
## 3of5_22 0.4441 0.19724
## 3of5_23 0.8596 0.73893
## 3of5_24 0.5497 0.30220
## 3of5_25 0.5256 0.27624
## 3of5_26 0.7102 0.50445
## 3of5_27 0.5969 0.35632
## 3of5_28 0.4133 0.17084
## 3of5_29 0.5406 0.29221
## 3of5_30 0.5113 0.26144
## 3of5_31 0.4263 0.18169
## 3of5_32 0.5758 0.33151
##
## SS loadings: 94.4
## Proportion Var: 0.421
##
## Factor correlations:
##
## F1
## F1 1
all_items_stats$itemstats$discrim = coef(all_items_fit, simplify = T)$items[, 1] %>% unname()
all_items_stats$itemstats$difficulty = -coef(all_items_fit, simplify = T)$items[, 2] %>% unname()
all_items_stats$itemstats
## N mean sd total.r total.r_if_rm alpha_if_rm g_loading discrim difficulty
## a_specific_number 441 0.841 0.366 0.259 0.249 0.974 0.456 0.872 -1.900
## a_type_of_brass_instrument 441 0.932 0.252 0.386 0.380 0.974 0.877 3.107 -4.937
## a_type_of_drapery 441 0.823 0.382 0.389 0.380 0.974 0.651 1.459 -2.092
## a_type_of_fabric_101 441 0.934 0.248 0.320 0.314 0.974 0.742 1.886 -3.734
## a_type_of_fabric_98 441 0.755 0.431 0.480 0.470 0.974 0.699 1.663 -1.702
## a_type_of_grasshopper 441 0.821 0.384 0.511 0.503 0.974 0.802 2.288 -2.674
## a_type_of_hat 441 0.356 0.479 0.467 0.456 0.974 0.574 1.192 0.725
## a_type_of_heating_unit 441 0.871 0.336 0.248 0.240 0.974 0.498 0.977 -2.213
## a_type_of_magistrate_position 441 0.898 0.303 0.356 0.349 0.974 0.744 1.896 -3.187
## a_type_of_mathematical_operation 441 0.921 0.271 0.262 0.255 0.974 0.569 1.179 -2.926
## a_type_of_overshoe 441 0.635 0.482 0.396 0.384 0.974 0.496 0.971 -0.689
## a_type_of_plant 441 0.862 0.346 0.422 0.414 0.974 0.734 1.838 -2.721
## a_type_of_religious_teacher 441 0.855 0.353 0.463 0.455 0.974 0.779 2.115 -2.874
## a_type_of_rock 441 0.948 0.223 0.099 0.094 0.974 0.319 0.573 -3.034
## a_type_of_skirts 441 0.828 0.378 0.386 0.377 0.974 0.642 1.424 -2.107
## a_type_of_smokeless_powder 441 0.753 0.432 0.299 0.288 0.974 0.466 0.897 -1.305
## a_type_of_sound 441 0.680 0.467 0.363 0.351 0.974 0.489 0.955 -0.919
## a_type_of_sword 441 0.599 0.491 0.259 0.245 0.974 0.356 0.648 -0.448
## a_type_of_volcanic_crater 441 0.855 0.353 0.379 0.371 0.974 0.666 1.521 -2.415
## a_type_of_wavy_form 441 0.615 0.487 0.551 0.541 0.974 0.706 1.698 -0.803
## actionable_negligence 441 0.841 0.366 0.446 0.438 0.974 0.768 2.042 -2.677
## advisory 441 0.959 0.198 0.243 0.238 0.974 0.723 1.782 -4.191
## advocate 441 0.751 0.433 0.500 0.491 0.974 0.701 1.673 -1.676
## almighty 441 0.961 0.193 0.350 0.346 0.974 0.914 3.830 -6.556
## amazement 441 0.918 0.274 0.305 0.298 0.974 0.660 1.497 -3.134
## amenability 441 0.832 0.374 0.420 0.412 0.974 0.738 1.864 -2.454
## auspices 441 0.494 0.501 0.559 0.548 0.974 0.687 1.609 -0.059
## barely_able_to_read_and_write 441 0.880 0.326 0.471 0.464 0.974 0.840 2.632 -3.619
## because 441 0.912 0.284 0.284 0.277 0.974 0.681 1.581 -3.105
## berate 441 0.857 0.350 0.450 0.442 0.974 0.836 2.592 -3.308
## blade 441 0.939 0.240 0.342 0.336 0.974 0.782 2.135 -4.059
## blend 441 0.912 0.284 0.327 0.320 0.974 0.681 1.584 -3.108
## blunder 441 0.961 0.193 0.229 0.225 0.974 0.859 2.855 -5.396
## bow 441 0.596 0.491 0.596 0.586 0.974 0.751 1.935 -0.768
## brief 441 0.571 0.495 0.431 0.419 0.974 0.432 0.816 -0.349
## bring_about 441 0.868 0.338 0.438 0.430 0.974 0.789 2.185 -3.081
## carelessly_or_hastily_put_together 441 0.866 0.341 0.312 0.304 0.974 0.608 1.304 -2.373
## celebration 441 0.757 0.429 0.478 0.468 0.974 0.718 1.758 -1.775
## cheat 441 0.723 0.448 0.574 0.566 0.974 0.805 2.310 -1.855
## clay_pigeon_shooting 441 0.943 0.232 0.293 0.287 0.974 0.766 2.028 -4.048
## coarse 441 0.941 0.236 0.340 0.334 0.974 0.775 2.090 -4.062
## collect_or_study_insects 441 0.932 0.252 0.273 0.266 0.974 0.625 1.364 -3.243
## colossal 441 0.823 0.382 0.417 0.408 0.974 0.712 1.724 -2.272
## commotion 441 0.853 0.355 0.562 0.555 0.974 0.902 3.556 -4.138
## complainer 441 0.839 0.368 0.482 0.474 0.974 0.794 2.226 -2.802
## confiscate 441 0.896 0.306 0.484 0.477 0.974 0.909 3.703 -4.915
## congratulate 441 0.887 0.317 0.294 0.285 0.974 0.591 1.246 -2.544
## contemplate 441 0.902 0.297 0.242 0.234 0.974 0.468 0.903 -2.509
## convoy 441 0.764 0.425 0.592 0.584 0.974 0.854 2.794 -2.529
## couch 441 0.757 0.429 0.434 0.424 0.974 0.650 1.456 -1.601
## crease 441 0.830 0.376 0.471 0.463 0.974 0.762 2.004 -2.540
## cunning 441 0.796 0.403 0.369 0.359 0.974 0.638 1.410 -1.849
## deceit 441 0.746 0.436 0.390 0.379 0.974 0.574 1.194 -1.392
## deliberately 441 0.812 0.391 0.479 0.471 0.974 0.760 1.991 -2.366
## deprive 441 0.837 0.370 0.384 0.375 0.974 0.635 1.401 -2.168
## detectable 441 0.966 0.181 0.103 0.097 0.974 0.338 0.611 -3.503
## diatribes 441 0.624 0.485 0.454 0.443 0.974 0.579 1.208 -0.696
## disjoined 441 0.494 0.501 0.272 0.259 0.974 0.312 0.558 0.017
## disrupt 441 0.959 0.198 0.275 0.270 0.974 0.790 2.197 -4.606
## distinct 441 0.857 0.350 0.186 0.176 0.974 0.275 0.488 -1.872
## divergence 441 0.698 0.459 0.590 0.582 0.974 0.795 2.233 -1.618
## dome 441 0.744 0.437 0.394 0.383 0.974 0.552 1.125 -1.347
## downfall 441 0.898 0.303 0.407 0.400 0.974 0.774 2.082 -3.351
## drink 441 0.769 0.422 0.558 0.549 0.974 0.851 2.754 -2.542
## elite 441 0.853 0.355 0.385 0.376 0.974 0.699 1.662 -2.496
## embarrassment 441 0.875 0.331 0.314 0.305 0.974 0.602 1.283 -2.449
## emphasize 441 0.966 0.181 0.285 0.280 0.974 0.880 3.146 -5.890
## empire 441 0.816 0.388 0.314 0.305 0.974 0.505 0.997 -1.770
## empty 441 0.848 0.359 0.448 0.440 0.974 0.759 1.981 -2.697
## entanglement 441 0.542 0.499 0.538 0.527 0.974 0.691 1.626 -0.345
## environment 441 0.503 0.501 0.541 0.531 0.974 0.658 1.488 -0.098
## excite 441 0.707 0.455 0.581 0.572 0.974 0.775 2.086 -1.603
## flammable 441 0.986 0.116 0.224 0.221 0.974 0.920 3.984 -8.083
## flatterer 441 0.844 0.364 0.559 0.551 0.974 0.867 2.962 -3.480
## flatteries 441 0.732 0.443 0.481 0.471 0.974 0.646 1.440 -1.426
## forever 441 0.864 0.343 0.455 0.447 0.974 0.798 2.253 -3.088
## frequent 441 0.977 0.149 0.001 -0.003 0.974 0.092 0.157 -3.775
## gigantic 441 0.952 0.213 0.243 0.237 0.974 0.762 2.005 -4.232
## girl 441 0.875 0.331 0.418 0.410 0.974 0.779 2.116 -3.100
## goo 441 0.971 0.169 0.221 0.216 0.974 0.716 1.745 -4.515
## goodbye 441 0.948 0.223 0.326 0.321 0.974 0.820 2.434 -4.562
## greed 441 0.649 0.478 0.668 0.660 0.974 0.840 2.639 -1.441
## groan 441 0.912 0.284 0.391 0.384 0.974 0.851 2.754 -4.195
## gruesome 441 0.834 0.372 0.391 0.382 0.974 0.665 1.514 -2.224
## guarantee 441 0.930 0.256 0.105 0.098 0.974 0.283 0.503 -2.683
## gutter 441 0.941 0.236 0.225 0.219 0.974 0.657 1.481 -3.503
## harmfulness 441 0.868 0.338 0.313 0.304 0.974 0.523 1.044 -2.230
## hobby 441 0.880 0.326 0.443 0.436 0.974 0.805 2.311 -3.325
## hut 441 0.889 0.315 0.454 0.447 0.974 0.811 2.355 -3.480
## illness 441 0.717 0.451 0.333 0.322 0.974 0.491 0.961 -1.119
## impromptu 441 0.626 0.484 0.514 0.503 0.974 0.660 1.496 -0.801
## indescribable 441 0.955 0.208 0.292 0.287 0.974 0.761 2.000 -4.283
## intellectual 441 0.943 0.232 0.294 0.288 0.974 0.710 1.718 -3.755
## jargon 441 0.959 0.198 0.345 0.340 0.974 0.926 4.165 -6.891
## knowledgeable 441 0.823 0.382 0.512 0.503 0.974 0.816 2.406 -2.790
## lackadaisical 441 0.741 0.438 0.416 0.406 0.974 0.606 1.295 -1.412
## manager 441 0.986 0.116 0.191 0.188 0.974 0.867 2.957 -6.731
## meal 441 0.610 0.488 0.625 0.616 0.974 0.791 2.202 -0.957
## melodic 441 0.873 0.333 0.422 0.414 0.974 0.728 1.808 -2.818
## mutually 441 0.966 0.181 0.309 0.304 0.974 0.873 3.040 -5.765
## nonsense 441 0.873 0.333 0.494 0.486 0.974 0.860 2.870 -3.755
## nonsensical 441 0.973 0.163 0.305 0.300 0.974 0.871 3.017 -6.007
## not_coveted 441 0.853 0.355 0.370 0.361 0.974 0.664 1.510 -2.386
## pamper 441 0.467 0.499 0.453 0.442 0.974 0.590 1.244 0.121
## penitentiary 441 0.918 0.274 0.240 0.232 0.974 0.526 1.053 -2.807
## perplexing 441 0.905 0.294 0.375 0.368 0.974 0.690 1.621 -3.045
## persistence 441 0.862 0.346 0.350 0.341 0.974 0.637 1.405 -2.398
## predetermine 441 0.916 0.278 0.255 0.248 0.974 0.542 1.098 -2.805
## pretender 441 0.991 0.095 0.140 0.137 0.974 0.856 2.821 -7.004
## questioning 441 0.900 0.300 0.343 0.335 0.974 0.661 1.499 -2.889
## quickly 441 0.789 0.408 0.402 0.392 0.974 0.657 1.482 -1.841
## [ reached 'max' / getOption("max.print") -- omitted 113 rows ]
all_items_stats$itemstats %>%
describe2()
## # A tibble: 9 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 N 224 422. 441 27.2 0 383 441 -0.738 -1.46
## 2 mean 224 0.685 0.754 0.239 0.234 0.0783 0.997 -0.695 -0.676
## 3 sd 224 0.385 0.407 0.105 0.110 0.0511 0.501 -0.890 0.0891
## 4 total.r 224 0.383 0.391 0.130 0.128 -0.123 0.725 -0.383 0.811
## 5 total.r_if_rm 224 0.374 0.383 0.129 0.126 -0.130 0.718 -0.366 0.810
## 6 alpha_if_rm 224 0.974 0.974 0.0000919 0.0000923 0.974 0.975 -0.165 0.529
## 7 g_loading 224 0.621 0.654 0.190 0.190 -0.227 0.975 -0.845 1.19
## 8 discrim 224 1.59 1.47 0.877 0.761 -0.396 7.42 1.74 7.88
## 9 difficulty 224 -1.70 -1.68 2.23 2.11 -14.2 2.74 -1.04 3.68
# descriptive stats for wave 1 test
all_items_stats$itemstats %>%
filter(N == 441) %>%
describe2()
## # A tibble: 9 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 N 151 441 441 0 0 441 441 NaN NaN
## 2 mean 151 0.806 0.848 0.141 0.124 0.333 0.993 -1.14 0.933
## 3 sd 151 0.356 0.359 0.102 0.113 0.0823 0.501 -0.502 -0.502
## 4 total.r 151 0.391 0.396 0.122 0.124 0.00126 0.669 -0.228 0.0736
## 5 total.r_if_rm 151 0.383 0.384 0.121 0.124 -0.00293 0.661 -0.209 0.0696
## 6 alpha_if_rm 151 0.974 0.974 0.0000823 0.0000877 0.974 0.974 -0.443 -0.176
## 7 g_loading 151 0.681 0.699 0.157 0.142 0.0918 0.975 -0.899 0.823
## 8 discrim 151 1.82 1.66 0.884 0.675 0.157 7.42 1.99 9.42
## 9 difficulty 151 -2.62 -2.50 1.85 1.33 -14.2 0.783 -1.95 9.19
# descriptive stats for wave 2 test
all_items_stats$itemstats %>%
filter(N == 383) %>%
describe2()
## # A tibble: 9 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 N 73 383 383 0 0 383 383 NaN NaN
## 2 mean 73 0.436 0.428 0.206 0.205 0.0783 0.997 0.651 0.114
## 3 sd 73 0.445 0.469 0.0826 0.0411 0.0511 0.501 -2.74 8.87
## 4 total.r 73 0.367 0.379 0.145 0.121 -0.123 0.725 -0.470 1.14
## 5 total.r_if_rm 73 0.356 0.368 0.145 0.126 -0.130 0.718 -0.437 1.09
## 6 alpha_if_rm 73 0.974 0.974 0.000108 0.000103 0.974 0.975 -0.0804 0.466
## 7 g_loading 73 0.497 0.511 0.193 0.174 -0.227 0.880 -0.628 1.80
## 8 discrim 73 1.09 1.01 0.628 0.449 -0.396 3.16 1.10 1.90
## 9 difficulty 73 0.204 0.354 1.69 1.09 -8.24 2.74 -2.43 8.74
#difficulties
all_items_stats$itemstats$mean %>% GG_denhist() +
scale_x_continuous("Pass rate")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
GG_save("figs/all items pass rate.png") # create folder named "figs" and put png in there
#loadings
all_items_stats$itemstats$g_loading %>% GG_denhist() +
scale_x_continuous("Factor loading")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/all items factor loading.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#reliability
plot(all_items_fit, type = "rxx")
rxx_info = plot(all_items_fit, type = "rxx")
all_items_fit_scores = fscores(all_items_fit, full.scores.SE = T)
empirical_rxx(all_items_fit_scores)
## F1
## 0.973
#which range has >.90?
d_rxx_info = tibble(
z = rxx_info$panel.args[[1]]$x,
rel = rxx_info$panel.args[[1]]$y
)
d_rxx_info %>%
filter(rel > .90) %>%
describe2()
## # A tibble: 2 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 z 104 -0.784 -0.784 1.82 2.32 -3.89 2.32 1.20e-16 -1.23
## 2 rel 104 0.966 0.976 0.0260 0.0222 0.901 0.992 -8.98e- 1 -0.400
d_rxx_info %>%
filter(rel > .80) %>%
describe2()
## # A tibble: 2 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 z 133 -0.573 -0.573 2.32 2.95 -4.55 3.41 1.95e-16 -1.23
## 2 rel 133 0.942 0.963 0.0528 0.0402 0.806 0.992 -1.01e+ 0 -0.169
d_rxx_info %>%
ggplot(aes(z, rel)) +
geom_line() +
scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
scale_x_continuous("Ability level (z)") +
coord_cartesian(xlim = c(-4, 4))
GG_save("figs/reliability as function of ability.png")
d_rxx_info %>%
filter(z >= -2, z <= 2) %>%
describe2()
## # A tibble: 2 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 z 66 -2.42e-16 -4.44e-16 1.16 1.48 -1.96 1.96 -6.46e-17 -1.25
## 2 rel 66 9.73e- 1 9.82e- 1 0.0208 0.0150 0.922 0.992 -9.38e- 1 -0.391
d_rxx_info %>%
filter(z >= -3, z <= 3) %>%
describe2()
## # A tibble: 2 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 z 100 -2.45e-16 -4.44e-16 1.75 2.24 -2.98 2.98 2.33e-16 -1.24
## 2 rel 100 9.60e- 1 9.77e- 1 0.0384 0.0208 0.850 0.992 -1.29e+ 0 0.556
#difficulty and g-loading
all_items_stats$itemstats %>%
rownames_to_column() %>%
GG_scatter("mean", "g_loading", case_names = "rowname") +
xlab("Pass rate") +
ylab("Factor loading")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/scatter pass rate loading.png")
## `geom_smooth()` using formula = 'y ~ x'
#merge g score back to main dataset
d2 = left_join(
d,
tibble(id = all_items_scored$id,
g = all_items_fit_scores[, 1]),
by = c("Participant_id" = "id")
)
assert_that(!anyDuplicated(d2$Participant_id))
## [1] TRUE
#restandardize g for age, white subset
d2$g_ageadj = resid(ols(g ~ rcs(age), data = d2)) %>% unname()
## number of knots in rcs defaulting to 5
d2$g_ageadj_z = standardize(d2$g, focal_group = d2$white_only)
all_items_stats$itemstats %>% filter(g_loading < .25)
## N mean sd total.r total.r_if_rm alpha_if_rm g_loading discrim difficulty
## frequent 441 0.977 0.149 0.001 -0.003 0.974 0.092 0.157 -3.775
## 2of5_24 383 0.238 0.426 0.190 0.178 0.974 0.212 0.368 1.220
## 3of5_9 383 0.264 0.441 0.212 0.199 0.974 0.248 0.435 1.093
## 3of5_15 383 0.285 0.452 -0.037 -0.050 0.975 -0.045 -0.076 0.919
## 3of5_19 383 0.078 0.269 -0.123 -0.130 0.975 -0.227 -0.396 2.506
good_items_scored = all_items_scored %>% select(-!!(all_items_stats$itemstats %>% filter(g_loading < .25) %>% rownames()))
#refit
good_items_fit = cache_object(filename = "data/good_items_fit.rds", expr = mirt(
good_items_scored %>% select(-id),
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 2000)
),
renew = renew_all)
## Cache found, reading object from disk
good_items_fit
##
## Call:
## mirt(data = good_items_scored %>% select(-id), model = 1, itemtype = "2PL",
## technical = list(NCYCLES = 2000))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 328 EM iterations.
## mirt version: 1.42
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -36583
## Estimated parameters: 438
## AIC = 74042
## BIC = 75833; SABIC = 74443
good_items_fit %>% summary()
## F1 h2
## a_specific_number 0.456 0.2083
## a_type_of_brass_instrument 0.877 0.7699
## a_type_of_drapery 0.652 0.4257
## a_type_of_fabric_101 0.744 0.5532
## a_type_of_fabric_98 0.700 0.4895
## a_type_of_grasshopper 0.803 0.6452
## a_type_of_hat 0.574 0.3293
## a_type_of_heating_unit 0.498 0.2478
## a_type_of_magistrate_position 0.746 0.5566
## a_type_of_mathematical_operation 0.569 0.3240
## a_type_of_overshoe 0.495 0.2455
## a_type_of_plant 0.734 0.5392
## a_type_of_religious_teacher 0.779 0.6073
## a_type_of_rock 0.321 0.1028
## a_type_of_skirts 0.642 0.4126
## a_type_of_smokeless_powder 0.467 0.2178
## a_type_of_sound 0.490 0.2403
## a_type_of_sword 0.356 0.1265
## a_type_of_volcanic_crater 0.666 0.4441
## a_type_of_wavy_form 0.706 0.4986
## actionable_negligence 0.769 0.5916
## advisory 0.723 0.5231
## advocate 0.702 0.4921
## almighty 0.914 0.8357
## amazement 0.661 0.4366
## amenability 0.740 0.5471
## auspices 0.688 0.4727
## barely_able_to_read_and_write 0.841 0.7067
## because 0.680 0.4628
## berate 0.836 0.6993
## blade 0.783 0.6130
## blend 0.683 0.4659
## blunder 0.860 0.7401
## bow 0.751 0.5647
## brief 0.432 0.1866
## bring_about 0.790 0.6240
## carelessly_or_hastily_put_together 0.609 0.3711
## celebration 0.719 0.5167
## cheat 0.806 0.6497
## clay_pigeon_shooting 0.767 0.5885
## coarse 0.776 0.6028
## collect_or_study_insects 0.626 0.3922
## colossal 0.713 0.5078
## commotion 0.903 0.8150
## complainer 0.795 0.6323
## confiscate 0.909 0.8268
## congratulate 0.591 0.3491
## contemplate 0.468 0.2186
## convoy 0.855 0.7303
## couch 0.651 0.4239
## crease 0.762 0.5811
## cunning 0.639 0.4085
## deceit 0.575 0.3301
## deliberately 0.761 0.5790
## deprive 0.636 0.4043
## detectable 0.338 0.1142
## diatribes 0.579 0.3350
## disjoined 0.311 0.0970
## disrupt 0.791 0.6253
## distinct 0.275 0.0756
## divergence 0.796 0.6334
## dome 0.552 0.3046
## downfall 0.775 0.6010
## drink 0.851 0.7244
## elite 0.700 0.4896
## embarrassment 0.602 0.3624
## emphasize 0.881 0.7753
## empire 0.505 0.2554
## empty 0.759 0.5763
## entanglement 0.691 0.4775
## environment 0.659 0.4343
## excite 0.775 0.6010
## flammable 0.920 0.8466
## flatterer 0.868 0.7527
## flatteries 0.646 0.4178
## forever 0.799 0.6379
## gigantic 0.763 0.5826
## girl 0.779 0.6066
## goo 0.717 0.5147
## goodbye 0.820 0.6732
## greed 0.841 0.7073
## groan 0.851 0.7244
## gruesome 0.666 0.4434
## guarantee 0.283 0.0801
## gutter 0.658 0.4326
## harmfulness 0.523 0.2739
## hobby 0.806 0.6502
## hut 0.811 0.6582
## illness 0.491 0.2413
## impromptu 0.660 0.4359
## indescribable 0.762 0.5802
## intellectual 0.711 0.5054
## jargon 0.926 0.8573
## knowledgeable 0.817 0.6681
## lackadaisical 0.606 0.3676
## manager 0.868 0.7529
## meal 0.792 0.6277
## melodic 0.729 0.5313
## mutually 0.873 0.7630
## nonsense 0.861 0.7410
## nonsensical 0.872 0.7600
## not_coveted 0.664 0.4407
## pamper 0.590 0.3487
## penitentiary 0.528 0.2784
## perplexing 0.691 0.4768
## persistence 0.638 0.4075
## predetermine 0.542 0.2940
## pretender 0.857 0.7348
## questioning 0.662 0.4380
## quickly 0.657 0.4321
## rebellious 0.803 0.6452
## referee 0.772 0.5961
## referendum 0.525 0.2760
## relating_to_the_right 0.350 0.1227
## relevant 0.699 0.4890
## respectful 0.771 0.5937
## retailer 0.601 0.3612
## retract 0.524 0.2751
## ropes 0.799 0.6381
## sag 0.596 0.3548
## schemer 0.669 0.4478
## seize 0.793 0.6295
## sensitivity 0.717 0.5145
## shadows 0.658 0.4333
## sketch 0.655 0.4284
## slang 0.803 0.6442
## slave 0.481 0.2313
## sluggish 0.634 0.4020
## somber 0.606 0.3669
## spinelessness 0.632 0.4000
## sporadic 0.832 0.6916
## squad 0.975 0.9509
## stagger 0.652 0.4256
## stinking 0.877 0.7686
## stroll 0.872 0.7606
## stubborn_100 0.552 0.3050
## stubborn_143 0.654 0.4282
## stylish 0.693 0.4796
## summit 0.756 0.5713
## terminology 0.830 0.6882
## the_science_of_speech_sounds 0.751 0.5646
## transportation 0.298 0.0890
## tyrant 0.439 0.1932
## unhealthful 0.419 0.1756
## vile 0.881 0.7770
## vulgar 0.720 0.5181
## wandering 0.415 0.1724
## warning 0.637 0.4063
## wave 0.721 0.5199
## weaponry 0.676 0.4571
## 1of5_1 0.440 0.1938
## 1of5_2 0.289 0.0838
## 1of5_3 0.378 0.1431
## 1of5_4 0.311 0.0969
## 2of5_1 0.411 0.1685
## 2of5_2 0.536 0.2872
## 2of5_3 0.581 0.3376
## 2of5_4 0.619 0.3830
## 2of5_5 0.521 0.2714
## 2of5_6 0.736 0.5414
## 2of5_7 0.375 0.1406
## 2of5_8 0.585 0.3427
## 2of5_9 0.554 0.3074
## 2of5_10 0.378 0.1428
## 2of5_11 0.371 0.1373
## 2of5_12 0.418 0.1749
## 2of5_13 0.491 0.2410
## 2of5_14 0.402 0.1612
## 2of5_15 0.677 0.4578
## 2of5_16 0.574 0.3299
## 2of5_17 0.679 0.4617
## 2of5_18 0.735 0.5400
## 2of5_19 0.447 0.1997
## 2of5_20 0.384 0.1477
## 2of5_21 0.514 0.2646
## 2of5_22 0.881 0.7754
## 2of5_23 0.310 0.0964
## 2of5_25 0.734 0.5394
## 2of5_26 0.711 0.5058
## 2of5_27 0.582 0.3385
## 2of5_28 0.275 0.0755
## 2of5_29 0.548 0.3007
## 2of5_30 0.552 0.3047
## 2of5_31 0.538 0.2900
## 2of5_32 0.627 0.3926
## 2of5_33 0.333 0.1111
## 2of5_34 0.453 0.2055
## 2of5_35 0.295 0.0871
## 2of5_36 0.764 0.5831
## 2of5_37 0.856 0.7335
## 3of5_1 0.271 0.0732
## 3of5_2 0.494 0.2441
## 3of5_3 0.476 0.2263
## 3of5_4 0.707 0.4992
## 3of5_5 0.329 0.1082
## 3of5_6 0.750 0.5630
## 3of5_7 0.594 0.3526
## 3of5_8 0.411 0.1690
## 3of5_10 0.850 0.7228
## 3of5_11 0.723 0.5220
## 3of5_12 0.546 0.2980
## 3of5_13 0.365 0.1330
## 3of5_14 0.508 0.2577
## 3of5_16 0.326 0.1065
## 3of5_17 0.386 0.1486
## 3of5_18 0.319 0.1018
## 3of5_20 0.553 0.3056
## 3of5_21 0.422 0.1781
## 3of5_22 0.443 0.1960
## 3of5_23 0.860 0.7398
## 3of5_24 0.549 0.3018
## 3of5_25 0.526 0.2766
## 3of5_26 0.709 0.5033
## 3of5_27 0.596 0.3551
## 3of5_28 0.413 0.1709
## 3of5_29 0.541 0.2926
## 3of5_30 0.510 0.2606
## 3of5_31 0.426 0.1812
## 3of5_32 0.576 0.3316
##
## SS loadings: 94.3
## Proportion Var: 0.431
##
## Factor correlations:
##
## F1
## F1 1
good_items_stats = itemstats(good_items_scored %>% select(-id))
good_items_stats$itemstats$g_loading = good_items_fit %>% summary() %>% .$rotF %>% as.vector()
## F1 h2
## a_specific_number 0.456 0.2083
## a_type_of_brass_instrument 0.877 0.7699
## a_type_of_drapery 0.652 0.4257
## a_type_of_fabric_101 0.744 0.5532
## a_type_of_fabric_98 0.700 0.4895
## a_type_of_grasshopper 0.803 0.6452
## a_type_of_hat 0.574 0.3293
## a_type_of_heating_unit 0.498 0.2478
## a_type_of_magistrate_position 0.746 0.5566
## a_type_of_mathematical_operation 0.569 0.3240
## a_type_of_overshoe 0.495 0.2455
## a_type_of_plant 0.734 0.5392
## a_type_of_religious_teacher 0.779 0.6073
## a_type_of_rock 0.321 0.1028
## a_type_of_skirts 0.642 0.4126
## a_type_of_smokeless_powder 0.467 0.2178
## a_type_of_sound 0.490 0.2403
## a_type_of_sword 0.356 0.1265
## a_type_of_volcanic_crater 0.666 0.4441
## a_type_of_wavy_form 0.706 0.4986
## actionable_negligence 0.769 0.5916
## advisory 0.723 0.5231
## advocate 0.702 0.4921
## almighty 0.914 0.8357
## amazement 0.661 0.4366
## amenability 0.740 0.5471
## auspices 0.688 0.4727
## barely_able_to_read_and_write 0.841 0.7067
## because 0.680 0.4628
## berate 0.836 0.6993
## blade 0.783 0.6130
## blend 0.683 0.4659
## blunder 0.860 0.7401
## bow 0.751 0.5647
## brief 0.432 0.1866
## bring_about 0.790 0.6240
## carelessly_or_hastily_put_together 0.609 0.3711
## celebration 0.719 0.5167
## cheat 0.806 0.6497
## clay_pigeon_shooting 0.767 0.5885
## coarse 0.776 0.6028
## collect_or_study_insects 0.626 0.3922
## colossal 0.713 0.5078
## commotion 0.903 0.8150
## complainer 0.795 0.6323
## confiscate 0.909 0.8268
## congratulate 0.591 0.3491
## contemplate 0.468 0.2186
## convoy 0.855 0.7303
## couch 0.651 0.4239
## crease 0.762 0.5811
## cunning 0.639 0.4085
## deceit 0.575 0.3301
## deliberately 0.761 0.5790
## deprive 0.636 0.4043
## detectable 0.338 0.1142
## diatribes 0.579 0.3350
## disjoined 0.311 0.0970
## disrupt 0.791 0.6253
## distinct 0.275 0.0756
## divergence 0.796 0.6334
## dome 0.552 0.3046
## downfall 0.775 0.6010
## drink 0.851 0.7244
## elite 0.700 0.4896
## embarrassment 0.602 0.3624
## emphasize 0.881 0.7753
## empire 0.505 0.2554
## empty 0.759 0.5763
## entanglement 0.691 0.4775
## environment 0.659 0.4343
## excite 0.775 0.6010
## flammable 0.920 0.8466
## flatterer 0.868 0.7527
## flatteries 0.646 0.4178
## forever 0.799 0.6379
## gigantic 0.763 0.5826
## girl 0.779 0.6066
## goo 0.717 0.5147
## goodbye 0.820 0.6732
## greed 0.841 0.7073
## groan 0.851 0.7244
## gruesome 0.666 0.4434
## guarantee 0.283 0.0801
## gutter 0.658 0.4326
## harmfulness 0.523 0.2739
## hobby 0.806 0.6502
## hut 0.811 0.6582
## illness 0.491 0.2413
## impromptu 0.660 0.4359
## indescribable 0.762 0.5802
## intellectual 0.711 0.5054
## jargon 0.926 0.8573
## knowledgeable 0.817 0.6681
## lackadaisical 0.606 0.3676
## manager 0.868 0.7529
## meal 0.792 0.6277
## melodic 0.729 0.5313
## mutually 0.873 0.7630
## nonsense 0.861 0.7410
## nonsensical 0.872 0.7600
## not_coveted 0.664 0.4407
## pamper 0.590 0.3487
## penitentiary 0.528 0.2784
## perplexing 0.691 0.4768
## persistence 0.638 0.4075
## predetermine 0.542 0.2940
## pretender 0.857 0.7348
## questioning 0.662 0.4380
## quickly 0.657 0.4321
## rebellious 0.803 0.6452
## referee 0.772 0.5961
## referendum 0.525 0.2760
## relating_to_the_right 0.350 0.1227
## relevant 0.699 0.4890
## respectful 0.771 0.5937
## retailer 0.601 0.3612
## retract 0.524 0.2751
## ropes 0.799 0.6381
## sag 0.596 0.3548
## schemer 0.669 0.4478
## seize 0.793 0.6295
## sensitivity 0.717 0.5145
## shadows 0.658 0.4333
## sketch 0.655 0.4284
## slang 0.803 0.6442
## slave 0.481 0.2313
## sluggish 0.634 0.4020
## somber 0.606 0.3669
## spinelessness 0.632 0.4000
## sporadic 0.832 0.6916
## squad 0.975 0.9509
## stagger 0.652 0.4256
## stinking 0.877 0.7686
## stroll 0.872 0.7606
## stubborn_100 0.552 0.3050
## stubborn_143 0.654 0.4282
## stylish 0.693 0.4796
## summit 0.756 0.5713
## terminology 0.830 0.6882
## the_science_of_speech_sounds 0.751 0.5646
## transportation 0.298 0.0890
## tyrant 0.439 0.1932
## unhealthful 0.419 0.1756
## vile 0.881 0.7770
## vulgar 0.720 0.5181
## wandering 0.415 0.1724
## warning 0.637 0.4063
## wave 0.721 0.5199
## weaponry 0.676 0.4571
## 1of5_1 0.440 0.1938
## 1of5_2 0.289 0.0838
## 1of5_3 0.378 0.1431
## 1of5_4 0.311 0.0969
## 2of5_1 0.411 0.1685
## 2of5_2 0.536 0.2872
## 2of5_3 0.581 0.3376
## 2of5_4 0.619 0.3830
## 2of5_5 0.521 0.2714
## 2of5_6 0.736 0.5414
## 2of5_7 0.375 0.1406
## 2of5_8 0.585 0.3427
## 2of5_9 0.554 0.3074
## 2of5_10 0.378 0.1428
## 2of5_11 0.371 0.1373
## 2of5_12 0.418 0.1749
## 2of5_13 0.491 0.2410
## 2of5_14 0.402 0.1612
## 2of5_15 0.677 0.4578
## 2of5_16 0.574 0.3299
## 2of5_17 0.679 0.4617
## 2of5_18 0.735 0.5400
## 2of5_19 0.447 0.1997
## 2of5_20 0.384 0.1477
## 2of5_21 0.514 0.2646
## 2of5_22 0.881 0.7754
## 2of5_23 0.310 0.0964
## 2of5_25 0.734 0.5394
## 2of5_26 0.711 0.5058
## 2of5_27 0.582 0.3385
## 2of5_28 0.275 0.0755
## 2of5_29 0.548 0.3007
## 2of5_30 0.552 0.3047
## 2of5_31 0.538 0.2900
## 2of5_32 0.627 0.3926
## 2of5_33 0.333 0.1111
## 2of5_34 0.453 0.2055
## 2of5_35 0.295 0.0871
## 2of5_36 0.764 0.5831
## 2of5_37 0.856 0.7335
## 3of5_1 0.271 0.0732
## 3of5_2 0.494 0.2441
## 3of5_3 0.476 0.2263
## 3of5_4 0.707 0.4992
## 3of5_5 0.329 0.1082
## 3of5_6 0.750 0.5630
## 3of5_7 0.594 0.3526
## 3of5_8 0.411 0.1690
## 3of5_10 0.850 0.7228
## 3of5_11 0.723 0.5220
## 3of5_12 0.546 0.2980
## 3of5_13 0.365 0.1330
## 3of5_14 0.508 0.2577
## 3of5_16 0.326 0.1065
## 3of5_17 0.386 0.1486
## 3of5_18 0.319 0.1018
## 3of5_20 0.553 0.3056
## 3of5_21 0.422 0.1781
## 3of5_22 0.443 0.1960
## 3of5_23 0.860 0.7398
## 3of5_24 0.549 0.3018
## 3of5_25 0.526 0.2766
## 3of5_26 0.709 0.5033
## 3of5_27 0.596 0.3551
## 3of5_28 0.413 0.1709
## 3of5_29 0.541 0.2926
## 3of5_30 0.510 0.2606
## 3of5_31 0.426 0.1812
## 3of5_32 0.576 0.3316
##
## SS loadings: 94.3
## Proportion Var: 0.431
##
## Factor correlations:
##
## F1
## F1 1
good_items_stats$itemstats$discrim = coef(good_items_fit, simplify = T)$items[, 1] %>% unname()
good_items_stats$itemstats$difficulty = -coef(good_items_fit, simplify = T)$items[, 2] %>% unname()
good_items_stats$itemstats
## N mean sd total.r total.r_if_rm alpha_if_rm g_loading discrim difficulty
## a_specific_number 441 0.841 0.366 0.257 0.247 0.975 0.456 0.873 -1.901
## a_type_of_brass_instrument 441 0.932 0.252 0.387 0.381 0.975 0.877 3.113 -4.945
## a_type_of_drapery 441 0.823 0.382 0.389 0.380 0.975 0.652 1.465 -2.098
## a_type_of_fabric_101 441 0.934 0.248 0.320 0.314 0.975 0.744 1.894 -3.742
## a_type_of_fabric_98 441 0.755 0.431 0.479 0.470 0.975 0.700 1.667 -1.707
## a_type_of_grasshopper 441 0.821 0.384 0.510 0.502 0.975 0.803 2.295 -2.683
## a_type_of_hat 441 0.356 0.479 0.468 0.456 0.975 0.574 1.192 0.723
## a_type_of_heating_unit 441 0.871 0.336 0.248 0.239 0.975 0.498 0.977 -2.215
## a_type_of_magistrate_position 441 0.898 0.303 0.357 0.350 0.975 0.746 1.907 -3.199
## a_type_of_mathematical_operation 441 0.921 0.271 0.263 0.255 0.975 0.569 1.178 -2.927
## a_type_of_overshoe 441 0.635 0.482 0.396 0.384 0.975 0.495 0.971 -0.690
## a_type_of_plant 441 0.862 0.346 0.422 0.414 0.975 0.734 1.841 -2.726
## a_type_of_religious_teacher 441 0.855 0.353 0.462 0.454 0.975 0.779 2.117 -2.878
## a_type_of_rock 441 0.948 0.223 0.100 0.094 0.975 0.321 0.576 -3.036
## a_type_of_skirts 441 0.828 0.378 0.386 0.377 0.975 0.642 1.426 -2.111
## a_type_of_smokeless_powder 441 0.753 0.432 0.301 0.290 0.975 0.467 0.898 -1.306
## a_type_of_sound 441 0.680 0.467 0.364 0.353 0.975 0.490 0.957 -0.921
## a_type_of_sword 441 0.599 0.491 0.258 0.245 0.975 0.356 0.648 -0.449
## a_type_of_volcanic_crater 441 0.855 0.353 0.380 0.371 0.975 0.666 1.521 -2.417
## a_type_of_wavy_form 441 0.615 0.487 0.549 0.539 0.975 0.706 1.697 -0.806
## actionable_negligence 441 0.841 0.366 0.449 0.440 0.975 0.769 2.048 -2.685
## advisory 441 0.959 0.198 0.242 0.237 0.975 0.723 1.783 -4.193
## advocate 441 0.751 0.433 0.502 0.492 0.975 0.702 1.675 -1.680
## almighty 441 0.961 0.193 0.354 0.349 0.975 0.914 3.839 -6.568
## amazement 441 0.918 0.274 0.305 0.298 0.975 0.661 1.498 -3.137
## amenability 441 0.832 0.374 0.422 0.413 0.975 0.740 1.871 -2.462
## auspices 441 0.494 0.501 0.558 0.548 0.975 0.688 1.611 -0.062
## barely_able_to_read_and_write 441 0.880 0.326 0.473 0.466 0.975 0.841 2.642 -3.630
## because 441 0.912 0.284 0.284 0.277 0.975 0.680 1.580 -3.105
## berate 441 0.857 0.350 0.451 0.443 0.975 0.836 2.596 -3.314
## blade 441 0.939 0.240 0.342 0.336 0.975 0.783 2.142 -4.068
## blend 441 0.912 0.284 0.329 0.321 0.975 0.683 1.590 -3.114
## blunder 441 0.961 0.193 0.230 0.225 0.975 0.860 2.872 -5.416
## bow 441 0.596 0.491 0.595 0.586 0.975 0.751 1.938 -0.774
## brief 441 0.571 0.495 0.429 0.417 0.975 0.432 0.815 -0.350
## bring_about 441 0.868 0.338 0.438 0.430 0.975 0.790 2.192 -3.090
## carelessly_or_hastily_put_together 441 0.866 0.341 0.312 0.304 0.975 0.609 1.308 -2.377
## celebration 441 0.757 0.429 0.477 0.467 0.975 0.719 1.760 -1.779
## cheat 441 0.723 0.448 0.577 0.569 0.975 0.806 2.318 -1.864
## clay_pigeon_shooting 441 0.943 0.232 0.292 0.286 0.975 0.767 2.035 -4.057
## coarse 441 0.941 0.236 0.340 0.333 0.975 0.776 2.097 -4.069
## collect_or_study_insects 441 0.932 0.252 0.272 0.265 0.975 0.626 1.367 -3.247
## colossal 441 0.823 0.382 0.419 0.410 0.975 0.713 1.729 -2.278
## commotion 441 0.853 0.355 0.563 0.556 0.975 0.903 3.572 -4.156
## complainer 441 0.839 0.368 0.483 0.475 0.975 0.795 2.232 -2.810
## confiscate 441 0.896 0.306 0.483 0.476 0.975 0.909 3.718 -4.932
## congratulate 441 0.887 0.317 0.294 0.285 0.975 0.591 1.246 -2.546
## contemplate 441 0.902 0.297 0.242 0.234 0.975 0.468 0.900 -2.509
## convoy 441 0.764 0.425 0.594 0.586 0.975 0.855 2.801 -2.538
## couch 441 0.757 0.429 0.434 0.424 0.975 0.651 1.460 -1.605
## crease 441 0.830 0.376 0.470 0.461 0.975 0.762 2.005 -2.542
## cunning 441 0.796 0.403 0.369 0.359 0.975 0.639 1.414 -1.854
## deceit 441 0.746 0.436 0.391 0.380 0.975 0.575 1.195 -1.395
## deliberately 441 0.812 0.391 0.479 0.471 0.975 0.761 1.996 -2.372
## deprive 441 0.837 0.370 0.386 0.377 0.975 0.636 1.402 -2.171
## detectable 441 0.966 0.181 0.103 0.097 0.975 0.338 0.611 -3.504
## diatribes 441 0.624 0.485 0.453 0.441 0.975 0.579 1.208 -0.699
## disjoined 441 0.494 0.501 0.272 0.259 0.975 0.311 0.558 0.016
## disrupt 441 0.959 0.198 0.274 0.269 0.975 0.791 2.198 -4.609
## distinct 441 0.857 0.350 0.186 0.176 0.975 0.275 0.487 -1.873
## divergence 441 0.698 0.459 0.591 0.582 0.975 0.796 2.237 -1.624
## dome 441 0.744 0.437 0.393 0.383 0.975 0.552 1.126 -1.349
## downfall 441 0.898 0.303 0.407 0.400 0.975 0.775 2.089 -3.359
## drink 441 0.769 0.422 0.558 0.550 0.975 0.851 2.759 -2.550
## elite 441 0.853 0.355 0.384 0.376 0.975 0.700 1.667 -2.501
## embarrassment 441 0.875 0.331 0.313 0.304 0.975 0.602 1.283 -2.451
## emphasize 441 0.966 0.181 0.286 0.282 0.975 0.881 3.161 -5.909
## empire 441 0.816 0.388 0.317 0.307 0.975 0.505 0.997 -1.771
## empty 441 0.848 0.359 0.448 0.439 0.975 0.759 1.985 -2.702
## entanglement 441 0.542 0.499 0.539 0.528 0.975 0.691 1.627 -0.349
## environment 441 0.503 0.501 0.541 0.531 0.975 0.659 1.491 -0.101
## excite 441 0.707 0.455 0.581 0.572 0.975 0.775 2.089 -1.608
## flammable 441 0.986 0.116 0.223 0.220 0.975 0.920 3.999 -8.102
## flatterer 441 0.844 0.364 0.558 0.551 0.975 0.868 2.969 -3.489
## flatteries 441 0.732 0.443 0.482 0.472 0.975 0.646 1.442 -1.430
## forever 441 0.864 0.343 0.453 0.445 0.975 0.799 2.259 -3.096
## gigantic 441 0.952 0.213 0.242 0.237 0.975 0.763 2.011 -4.239
## girl 441 0.875 0.331 0.418 0.410 0.975 0.779 2.113 -3.100
## goo 441 0.971 0.169 0.219 0.215 0.975 0.717 1.753 -4.524
## goodbye 441 0.948 0.223 0.326 0.320 0.975 0.820 2.443 -4.572
## greed 441 0.649 0.478 0.668 0.660 0.975 0.841 2.646 -1.450
## groan 441 0.912 0.284 0.391 0.384 0.975 0.851 2.760 -4.202
## gruesome 441 0.834 0.372 0.392 0.383 0.975 0.666 1.519 -2.230
## guarantee 441 0.930 0.256 0.108 0.100 0.975 0.283 0.502 -2.683
## gutter 441 0.941 0.236 0.223 0.217 0.975 0.658 1.486 -3.509
## harmfulness 441 0.868 0.338 0.312 0.303 0.975 0.523 1.045 -2.233
## hobby 441 0.880 0.326 0.443 0.436 0.975 0.806 2.321 -3.336
## hut 441 0.889 0.315 0.454 0.446 0.975 0.811 2.362 -3.488
## illness 441 0.717 0.451 0.334 0.323 0.975 0.491 0.960 -1.120
## impromptu 441 0.626 0.484 0.513 0.503 0.975 0.660 1.496 -0.804
## indescribable 441 0.955 0.208 0.291 0.286 0.975 0.762 2.001 -4.286
## intellectual 441 0.943 0.232 0.296 0.290 0.975 0.711 1.720 -3.759
## jargon 441 0.959 0.198 0.346 0.341 0.975 0.926 4.172 -6.900
## knowledgeable 441 0.823 0.382 0.513 0.505 0.975 0.817 2.415 -2.800
## lackadaisical 441 0.741 0.438 0.416 0.405 0.975 0.606 1.298 -1.416
## manager 441 0.986 0.116 0.192 0.189 0.975 0.868 2.971 -6.748
## meal 441 0.610 0.488 0.627 0.619 0.975 0.792 2.210 -0.965
## melodic 441 0.873 0.333 0.422 0.414 0.975 0.729 1.812 -2.823
## mutually 441 0.966 0.181 0.310 0.305 0.975 0.873 3.054 -5.781
## nonsense 441 0.873 0.333 0.493 0.486 0.975 0.861 2.879 -3.766
## nonsensical 441 0.973 0.163 0.306 0.301 0.975 0.872 3.029 -6.021
## not_coveted 441 0.853 0.355 0.371 0.362 0.975 0.664 1.511 -2.388
## pamper 441 0.467 0.499 0.452 0.441 0.975 0.590 1.245 0.118
## penitentiary 441 0.918 0.274 0.241 0.234 0.975 0.528 1.057 -2.811
## perplexing 441 0.905 0.294 0.376 0.369 0.975 0.691 1.625 -3.050
## persistence 441 0.862 0.346 0.350 0.341 0.975 0.638 1.412 -2.405
## predetermine 441 0.916 0.278 0.256 0.249 0.975 0.542 1.098 -2.807
## pretender 441 0.991 0.095 0.139 0.136 0.975 0.857 2.833 -7.019
## questioning 441 0.900 0.300 0.343 0.335 0.975 0.662 1.502 -2.894
## quickly 441 0.789 0.408 0.400 0.391 0.975 0.657 1.485 -1.845
## rebellious 441 0.832 0.374 0.488 0.480 0.975 0.803 2.295 -2.793
## [ reached 'max' / getOption("max.print") -- omitted 108 rows ]
good_items_stats$itemstats %>%
describe2()
## # A tibble: 9 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 N 219 423. 441 27.0 0 383 441 -0.791 -1.38
## 2 mean 219 0.692 0.757 0.232 0.229 0.0992 0.997 -0.714 -0.618
## 3 sd 219 0.386 0.405 0.104 0.110 0.0511 0.501 -0.892 0.124
## 4 total.r 219 0.391 0.396 0.119 0.124 0.0999 0.725 0.0355 -0.280
## 5 total.r_if_rm 219 0.382 0.384 0.119 0.123 0.0942 0.718 0.0607 -0.294
## 6 alpha_if_rm 219 0.975 0.975 0.0000854 0.0000882 0.975 0.975 -0.498 -0.0909
## 7 g_loading 219 0.634 0.658 0.170 0.180 0.271 0.975 -0.346 -0.798
## 8 discrim 219 1.62 1.49 0.861 0.763 0.478 7.49 1.95 8.85
## 9 difficulty 219 -1.75 -1.71 2.22 2.06 -14.3 2.74 -1.10 3.99
#difficulties
good_items_stats$itemstats$mean %>% GG_denhist() +
scale_x_continuous("Pass rate")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
GG_save("figs/good items pass rate.png")
#loadings
good_items_stats$itemstats$g_loading %>% GG_denhist() +
scale_x_continuous("Factor loading")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
GG_save("figs/good items factor loading.png")
#reliability
good_items_fit_scores = fscores(good_items_fit, full.scores.SE = T)
empirical_rxx(good_items_fit_scores)
## F1
## 0.972
#which range has >.90?
d_rxx_info = rxx_info = get_reliabilities(good_items_fit)
d_rxx_info %>%
filter(rel > .90) %>%
describe2()
## # A tibble: 2 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 z 104 -0.784 -0.784 1.82 2.32 -3.89 2.32 1.20e-16 -1.23
## 2 rel 104 0.966 0.976 0.0263 0.0224 0.900 0.992 -9.02e- 1 -0.391
d_rxx_info %>%
filter(rel > .80) %>%
describe2()
## # A tibble: 2 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 z 133 -0.573 -0.573 2.32 2.95 -4.55 3.41 1.95e-16 -1.23
## 2 rel 133 0.942 0.963 0.0536 0.0407 0.803 0.992 -1.01e+ 0 -0.152
d_rxx_info %>%
ggplot(aes(z, rel)) +
geom_line() +
scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
scale_x_continuous("Ability level (z)") +
coord_cartesian(xlim = c(-4, 4))
GG_save("figs/good items reliability as function of ability.png")
d_rxx_info %>%
filter(z >= -2, z <= 2) %>%
describe2()
## # A tibble: 2 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 z 66 -2.42e-16 -4.44e-16 1.16 1.48 -1.96 1.96 -6.46e-17 -1.25
## 2 rel 66 9.73e- 1 9.82e- 1 0.0210 0.0151 0.921 0.992 -9.41e- 1 -0.384
d_rxx_info %>%
filter(z >= -3, z <= 3) %>%
describe2()
## # A tibble: 2 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 z 100 -2.45e-16 -4.44e-16 1.75 2.24 -2.98 2.98 2.33e-16 -1.24
## 2 rel 100 9.60e- 1 9.77e- 1 0.0389 0.0209 0.848 0.992 -1.30e+ 0 0.574
#difficulty and g-loading
good_items_stats$itemstats %>%
rownames_to_column() %>%
GG_scatter("mean", "g_loading", case_names = "rowname") +
xlab("Pass rate") +
ylab("Factor loading")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/good items scatter pass rate loading.png")
## `geom_smooth()` using formula = 'y ~ x'
#merge g score back to main dataset
d2 = left_join(
d,
tibble(id = good_items_scored$id,
g = good_items_fit_scores[, 1]),
by = c("Participant_id" = "id")
)
assert_that(!anyDuplicated(d2$Participant_id))
## [1] TRUE
#Norming, the age problem
GG_scatter(d2, "age", "g") +
geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#scores by age group
d2$age_group = discretize(d2$age, 3)
GG_group_means(d2, "g", "age_group", type = "boxplot") +
scale_y_continuous("Vocabulary IRT score") +
scale_x_discrete("Age group")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
GG_save("figs/good items boxplots by age.png")
describe2(d2$g, d2$age_group)
## New names:
## • `` -> `...1`
## # A tibble: 3 × 11
## group var n mean median sd mad min max skew kurtosis
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 [18.9,40.3] ...1 176 -0.390 -0.481 0.832 0.841 -2.27 1.81 0.592 -0.194
## 2 (40.3,61.7] ...1 174 0.0964 -0.0615 0.957 1.09 -1.76 2.55 0.460 -0.685
## 3 (61.7,83.1] ...1 91 0.562 0.529 1.02 1.03 -1.50 3.15 0.184 -0.191
#whites differ in age?
describe2(d2$age, d2$white_only)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
## group var n mean median sd mad min max skew kurtosis
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FALSE ...1 114 41.3 39 14.8 18.5 21 74 0.312 -1.09
## 2 TRUE ...1 327 48.5 51 15.7 17.8 19 83 -0.121 -1.07
#white subset is easier to work with
d2_white = d2 %>% filter(white_only)
(white_ageadj_model = lm(g ~ age, data = d2_white))
##
## Call:
## lm(formula = g ~ age, data = d2_white)
##
## Coefficients:
## (Intercept) age
## -1.1010 0.0251
#get resids, step 1
d2_white$g_ageadj1 = resid(white_ageadj_model)
(ageadj_desc_whites = describe2(d2_white$g_ageadj1))
## # A tibble: 1 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 x 327 -2.57e-17 -0.0648 0.901 0.958 -2.26 2.60 0.320 -0.281
#alternative is to just model the absolute resids directly
(absSD_ols_whites = lm(abs(g_ageadj1) ~ age, data = d2_white))
##
## Call:
## lm(formula = abs(g_ageadj1) ~ age, data = d2_white)
##
## Coefficients:
## (Intercept) age
## 0.46724 0.00544
#get age mean and SD adjusted scores
d2_white$g_ageadj2 = d2_white$g_ageadj1 / predict(absSD_ols_whites)
#does this work tho?
d2_white %>%
GG_scatter("age", "g_ageadj2")
## `geom_smooth()` using formula = 'y ~ x'
test_HS(d2_white$g_ageadj2, d2_white$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## # A tibble: 4 × 5
## test r2adj p fit log10_p
## <chr> <dbl> <dbl> <named list> <dbl>
## 1 linear raw -0.00300 0.872 <ols> 0.0594
## 2 spline raw -0.00246 0.361 <ols> 0.443
## 3 linear rank -0.00305 0.924 <ols> 0.0343
## 4 spline rank 0.00330 0.164 <ols> 0.785
#restore to white z score norms
(white_desc_ageadj2_desc = describe2(d2_white$g_ageadj2))
## # A tibble: 1 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 x 327 0.000443 -0.0814 1.22 1.33 -2.59 3.73 0.397 -0.350
d2_white$g_ageadj3 = (d2_white$g_ageadj2 - white_desc_ageadj2_desc$mean) / white_desc_ageadj2_desc$sd
d2_white$g_ageadj3 %>% describe2()
## # A tibble: 1 × 10
## var n mean median sd mad min max skew kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 x 327 1.50e-17 -0.0668 1 1.08 -2.12 3.05 0.397 -0.350
#Thus finally, we can make IQs using a function
vocab_norms = kirkegaard::make_norms(
score = d2$g,
age = d2$age,
norm_group = d2$race_combos_common == "white"
)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.003**). Model used.
d2$IQ = vocab_norms$data$IQ
#plot results to see if they make sense
d2 %>%
GG_denhist("IQ", "white_only")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/good items IQ scores by Whiteness.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
describe2(d2$IQ, d2$white_only)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
## group var n mean median sd mad min max skew kurtosis
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FALSE ...1 114 95.2 92.6 16.1 15.8 62.0 149. 0.717 0.166
## 2 TRUE ...1 327 100 99.0 15 16.3 68.2 146. 0.397 -0.350
#should be no age relationship within groups
d2 %>%
GG_scatter("age", "IQ")
## `geom_smooth()` using formula = 'y ~ x'
#and no heteroscedasticity
test_HS(d2$IQ, d2$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## # A tibble: 4 × 5
## test r2adj p fit log10_p
## <chr> <dbl> <dbl> <named list> <dbl>
## 1 linear raw -0.00226 0.937 <ols> 0.0282
## 2 spline raw -0.00808 0.925 <ols> 0.0339
## 3 linear rank -0.00211 0.786 <ols> 0.105
## 4 spline rank -0.00674 0.803 <ols> 0.0954
d2_mmpi = d2 %>% select(I_am_easily_awakened_by_noise:I_like_movie_love_scenes) %>%
map_df(~mapvalues(., from = c("Yes", "No"), to = c(1, 0))) %>%
map_df(as.numeric)
#IQ means by MMPI
MMPI_IQ_means = map2_df(d2_mmpi, names(d2_mmpi), function(x, y) {
# browser()
desc = suppressMessages(describe2(d2$IQ, group = x))
tibble(
question = d_vars %>% filter(var_name == y) %>% pull(label),
yes = desc$mean[2],
no = desc$mean[1],
IQ_gap = yes-no,
abs_IQ_gap = abs(IQ_gap)
)
})
GG_scatter(d2, "Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct", "IQ") +
scale_x_continuous("Answer to 'How many items on the test you just took do you think you got correct?'")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/self-estimate vs. IQ.png")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d2, "Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did", "IQ")
## `geom_smooth()` using formula = 'y ~ x'
paired.r(
cor(d2$IQ, d2$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct, use = "pair"),
cor(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did, use = "pair"),
n = pairwiseCount(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did)[1, 1]
)
## Call: paired.r(xy = cor(d2$IQ, d2$Of_the_155_vocabulary_questions_you_just_answered_how_many_do_you_think_you_got_correct,
## use = "pair"), xz = cor(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did,
## use = "pair"), n = pairwiseCount(d2$IQ, d2$Compared_to_the_other_Prolific_survey_users_who_took_this_survey_how_well_do_you_think_you_did)[1,
## 1])
## [1] "test of difference between two independent correlations"
## z = 3.12 With probability = 0
describeBy(d2$IQ, d2$I_was_a_slow_learner_in_school)
##
## Descriptive statistics by group
## group: No
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 385 99.6 15.5 98.9 98.9 17 62 149 87.2 0.39 -0.28 0.79
## ------------------------------------------------------------------------------------------------
## group: Yes
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 56 92.8 13.5 89.7 91.4 11.2 70.3 132 61.9 0.96 0.44 1.8
describeBy(d2$IQ, d2$I_like_to_read_about_science)
##
## Descriptive statistics by group
## group: No
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 98 92.5 13.9 90.6 91.1 12.7 67 133 66.2 0.84 0.34 1.4
## ------------------------------------------------------------------------------------------------
## group: Yes
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 343 101 15.4 99.6 99.9 16.8 62 149 87.2 0.36 -0.28 0.83
describeBy(d2$IQ, d2$A_person_shouldn_t_be_punished_for_breaking_a_law_that_he_thinks_is_unreasonable)
##
## Descriptive statistics by group
## group: No
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 392 99.4 15.6 98.5 98.5 17.6 62 149 87.2 0.45 -0.35 0.79
## ------------------------------------------------------------------------------------------------
## group: Yes
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 49 93.2 12.2 94.6 93.5 14.5 68.2 116 47.7 -0.17 -0.9 1.74
GG_scatter(d2, "Total_approvals", "g") +
geom_smooth()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
# Sex bias
sex_bias_fit = cache_object(filename = "data/sex_bias_fit.rds", expr = DIF_test(
items = good_items_scored %>% select(-id),
model = 1,
group = d2$sex,
technical = list(NCYCLES = 2000)
))
## Cache found, reading object from disk
#DIF items
sex_bias_fit$DIF_stats %>% filter(p < .05)
## item groups converged AIC SABIC HQ BIC X2 df p number p_adj
## 1 a_type_of_drapery Male,Female TRUE -3.64 -1.810 -0.415 4.537 7.641 2 0.022 3 1.000
## 2 a_type_of_fabric_101 Male,Female TRUE -10.43 -8.602 -7.207 -2.255 14.433 2 0.001 4 0.161
## 3 a_type_of_fabric_98 Male,Female TRUE -5.98 -4.151 -2.756 2.196 9.982 2 0.007 5 1.000
## 4 a_type_of_hat Male,Female TRUE -8.22 -6.389 -4.994 -0.042 12.22 2 0.002 7 0.486
## 5 a_type_of_plant Male,Female TRUE -4.12 -2.289 -0.894 4.058 8.12 2 0.017 12 1.000
## 6 a_type_of_skirts Male,Female TRUE -14.63 -12.804 -11.409 -6.457 18.635 2 0 15 0.020
## 7 advocate Male,Female TRUE -2.41 -0.575 0.820 5.772 6.406 2 0.041 23 1.000
## 8 amazement Male,Female TRUE -4.64 -2.805 -1.410 3.542 8.636 2 0.013 25 1.000
## 9 auspices Male,Female TRUE -6.29 -4.454 -3.059 1.893 10.285 2 0.006 27 1.000
## 10 barely_able_to_read_and_write Male,Female TRUE -3.98 -2.149 -0.754 4.199 7.98 2 0.019 28 1.000
## 11 because Male,Female TRUE -4.75 -2.917 -1.522 3.430 8.748 2 0.013 29 1.000
## 12 celebration Male,Female TRUE -8.25 -6.419 -5.024 -0.072 12.25 2 0.002 38 0.479
## 13 colossal Male,Female TRUE -20.06 -18.229 -16.834 -11.882 24.06 2 0 43 0.001
## 14 convoy Male,Female TRUE -5.86 -4.028 -2.633 2.319 9.859 2 0.007 49 1.000
## 15 couch Male,Female TRUE -2.10 -0.274 1.121 6.073 6.105 2 0.047 50 1.000
## 16 drink Male,Female TRUE -8.87 -7.034 -5.639 -0.687 12.865 2 0.002 64 0.352
## 17 girl Male,Female TRUE -5.48 -3.646 -2.251 2.701 9.477 2 0.009 78 1.000
## 18 greed Male,Female TRUE -3.55 -1.720 -0.325 4.627 7.551 2 0.023 81 1.000
## 19 illness Male,Female TRUE -10.90 -9.073 -7.678 -2.726 14.904 2 0.001 89 0.127
## 20 intellectual Male,Female TRUE -4.14 -2.313 -0.918 4.034 8.144 2 0.017 92 1.000
## 21 nonsense Male,Female TRUE -4.39 -2.558 -1.163 3.789 8.389 2 0.015 100 1.000
## 22 pamper Male,Female TRUE -7.74 -5.908 -4.513 0.439 11.739 2 0.003 103 0.619
## 23 rebellious Male,Female TRUE -15.40 -13.565 -12.170 -7.218 19.396 2 0 111 0.013
## 24 referendum Male,Female TRUE -4.25 -2.421 -1.026 3.926 8.252 2 0.016 113 1.000
## 25 sketch Male,Female TRUE -4.11 -2.282 -0.887 4.065 8.113 2 0.017 125 1.000
## 26 somber Male,Female TRUE -5.06 -3.232 -1.837 3.115 9.063 2 0.011 129 1.000
## 27 sporadic Male,Female TRUE -4.60 -2.773 -1.378 3.574 8.604 2 0.014 131 1.000
## 28 stroll Male,Female TRUE -2.18 -0.351 1.044 5.996 6.182 2 0.045 135 1.000
## 29 stylish Male,Female TRUE -6.68 -4.850 -3.455 1.497 10.681 2 0.005 138 1.000
## 30 vulgar Male,Female TRUE -4.48 -2.649 -1.254 3.698 8.48 2 0.014 146 1.000
## 31 wave Male,Female TRUE -4.26 -2.433 -1.038 3.914 8.264 2 0.016 149 1.000
## 32 2of5_1 Male,Female TRUE -2.11 -0.279 1.116 6.068 6.11 2 0.047 155 1.000
## 33 2of5_5 Male,Female TRUE -6.08 -4.251 -2.856 2.096 10.082 2 0.006 159 1.000
## 34 2of5_12 Male,Female TRUE -4.92 -3.087 -1.692 3.260 8.918 2 0.012 166 1.000
## 35 2of5_13 Male,Female TRUE -2.65 -0.822 0.573 5.525 6.653 2 0.036 167 1.000
## 36 2of5_14 Male,Female TRUE -9.58 -7.753 -6.358 -1.406 13.584 2 0.001 168 0.246
## 37 2of5_17 Male,Female TRUE -5.99 -4.160 -2.765 2.188 9.991 2 0.007 171 1.000
## 38 2of5_30 Male,Female TRUE -5.65 -3.817 -2.422 2.530 9.648 2 0.008 183 1.000
## 39 2of5_31 Male,Female TRUE -3.33 -1.496 -0.101 4.851 7.327 2 0.026 184 1.000
## 40 3of5_8 Male,Female TRUE -6.22 -4.390 -2.995 1.957 10.221 2 0.006 198 1.000
## 41 3of5_10 Male,Female TRUE -2.90 -1.065 0.330 5.282 6.896 2 0.032 199 1.000
## 42 3of5_21 Male,Female TRUE -15.70 -13.866 -12.471 -7.519 19.697 2 0 208 0.012
## 43 3of5_27 Male,Female TRUE -3.29 -1.454 -0.059 4.893 7.285 2 0.026 214 1.000
## 44 3of5_28 Male,Female TRUE -2.43 -0.599 0.796 5.748 6.43 2 0.04 215 1.000
## 45 3of5_29 Male,Female TRUE -12.31 -10.474 -9.080 -4.127 16.306 2 0 216 0.063
sex_bias_fit$DIF_stats %>% filter(p_adj < .05)
## item groups converged AIC SABIC HQ BIC X2 df p number p_adj
## 1 a_type_of_skirts Male,Female TRUE -14.6 -12.8 -11.4 -6.46 18.635 2 0 15 0.020
## 2 colossal Male,Female TRUE -20.1 -18.2 -16.8 -11.88 24.06 2 0 43 0.001
## 3 rebellious Male,Female TRUE -15.4 -13.6 -12.2 -7.22 19.396 2 0 111 0.013
## 4 3of5_21 Male,Female TRUE -15.7 -13.9 -12.5 -7.52 19.697 2 0 208 0.012
sex_bias_fit$effect_size_test
## $liberal
## Effect Size Value
## 1 STDS 0.3903
## 2 UTDS 4.6722
## 3 UETSDS 0.4603
## 4 ETSSD 0.0119
## 5 Starks.DTFR 0.3253
## 6 UDTFR 4.4877
## 7 UETSDN 0.5160
## 8 theta.of.max.test.D -0.2722
## 9 Test.Dmax 0.8981
##
## $conservative
## Effect Size Value
## 1 STDS 0.04344
## 2 UTDS 0.60582
## 3 UETSDS 0.24699
## 4 ETSSD 0.00132
## 5 Starks.DTFR 0.05577
## 6 UDTFR 0.55640
## 7 UETSDN 0.25556
## 8 theta.of.max.test.D -1.46312
## 9 Test.Dmax -0.47072
sex_bias_fit$DIF_stats$item_number = seq_along_rows(sex_bias_fit$DIF_stats)
#plot items
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace")
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))
save_plot_to_file({
sex_bias_fit$fits$anchor_conservative %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p_adj < .05) %>% pull(item_number))
}, filename = "figs/good items sex DIF.png")
sex_bias_fit$fits$anchor_liberal %>% plot(type = "trace", which.items = sex_bias_fit$DIF_stats %>% filter(p < .05) %>% pull(item_number))
#plot bias for test
sex_bias_fit$fits$anchor_conservative %>% plot(type = "score")
#sex difference
SMD_matrix(d2$IQ, d2$sex)
## Male Female
## Male NA 0.0734
## Female 0.0734 NA
describe2(d2$IQ, d2$sex)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
## group var n mean median sd mad min max skew kurtosis
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Male ...1 199 99.4 98.7 16.2 17.1 62.0 149. 0.424 -0.183
## 2 Female ...1 242 98.2 96.9 14.8 16.2 67.0 146. 0.464 -0.434
var.test(IQ ~ sex, data = d2)
##
## F test to compare two variances
##
## data: IQ by sex
## F = 1, num df = 198, denom df = 241, p-value = 0.2
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.918 1.565
## sample estimates:
## ratio of variances
## 1.2
#item level pass rate SD by sex
good_items_stats_sexes = itemstats(
all_items_scored %>% select(-id),
group = d2$sex
)
## Warning in cor(data, use = "pairwise.complete.obs"): the standard deviation is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation is zero
## Warning in cor(x, tsx, use = "pairwise.complete.obs"): the standard deviation is zero
bind_rows(
good_items_stats_sexes$Male$itemstats %>% mutate(sex = "Men"),
good_items_stats_sexes$Female$itemstats %>% mutate(sex = "Women")
) %>%
GG_denhist("sd", group = "sex")
## Input seems like a fraction, set `boundary=0` and `binwidth=1/30` to avoid issues near the limits. Disable this with `auto_fraction_bounary=F`
t.test(
good_items_stats_sexes$Male$itemstats$sd,
good_items_stats_sexes$Female$itemstats$sd
)
##
## Welch Two Sample t-test
##
## data: good_items_stats_sexes$Male$itemstats$sd and good_items_stats_sexes$Female$itemstats$sd
## t = 1, df = 441, p-value = 0.3
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.00878 0.03110
## sample estimates:
## mean of x mean of y
## 0.390 0.378
#white subset
d2_white = d2 %>% filter(race_combos_common == "white")
SMD_matrix(d2_white$IQ, d2_white$sex)
## Male Female
## Male NA 0.163
## Female 0.163 NA
describe2(d2_white$IQ, d2_white$sex)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
## group var n mean median sd mad min max skew kurtosis
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Male ...1 148 101. 99.8 15.8 16.2 68.2 142. 0.252 -0.422
## 2 Female ...1 179 98.9 97.6 14.3 15.9 71.1 146. 0.511 -0.310
var.test(IQ ~ sex, data = d2_white)
##
## F test to compare two variances
##
## data: IQ by sex
## F = 1, num df = 147, denom df = 178, p-value = 0.2
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.892 1.658
## sample estimates:
## ratio of variances
## 1.21
d2 %>%
filter(race_combos_common == "white") %>%
GG_denhist("IQ", "sex")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Abbreviation
max_items = 50
max_cycles = 2000
plan(multisession(workers = 7))
#filter out items with bad pass rates
good_items_scored_stats = good_items_fit %>% get_mirt_stats()
good_items_passrate_filter = good_items_scored %>% select(all_of(good_items_scored_stats %>% filter(is_between(pass_rate, .05, .95)) %>% pull(item)))
ncol(good_items_passrate_filter)
## [1] 100
#full fit for filtered items
good_items_passrate_filter_fit = cache_object(filename = "data/good_items_passrate_filter_fit.rds", expr = mirt(
good_items_passrate_filter,
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = max_cycles)
), renew = renew_all)
## Cache found, reading object from disk
#make norms for 100 item version
norms_100 = make_norms(
score = good_items_passrate_filter_fit %>% fscores(full.scores.SE = T) %>% extract(, 1),
age = d2$age,
norm_group = d2$white_only,
p_value = .05
)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.003**). Model used.
#forward optimize for reliability
vocab_abbrev_forward = cache_object(filename = "data/vocab_abbrev_rc50.rds", expr = {
abbreviate_scale(
items = good_items_passrate_filter,
item_target = max_items,
method = "forwards",
selection_method = "rc",
mirt_args = list(
model = 1,
itemtype = "2PL",
verbose = F,
technical = list(NCYCLES = max_cycles)
)
)
}, renew = renew_all)
## Cache found, reading object from disk
#max loading
#simple
max_loading_basic = abbreviate_scale(
items = good_items_passrate_filter,
item_target = max_items,
method = "max_loading",
selection_method = "rc",
mirt_args = list(
model = 1,
itemtype = "2PL",
verbose = F,
technical = list(NCYCLES = max_cycles)
)
)
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## 67.37 sec elapsed
#balancing
max_loading_balanced = abbreviate_scale(
items = good_items_passrate_filter,
item_target = max_items,
method = "max_loading",
difficulty_balance_groups = 5,
selection_method = "rc",
mirt_args = list(
model = 1,
itemtype = "2PL",
verbose = F,
technical = list(NCYCLES = max_cycles)
)
)
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## 30.87 sec elapsed
#residualization
max_loading_resid = abbreviate_scale(
items = good_items_passrate_filter,
item_target = max_items,
method = "max_loading",
residualize_loadings = T,
selection_method = "rc",
mirt_args = list(
model = 1,
itemtype = "2PL",
verbose = F,
technical = list(NCYCLES = max_cycles)
)
)
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the max_loading method
## 29.92 sec elapsed
#plot results
abbrev_results = bind_rows(
vocab_abbrev_forward$best_sets %>% mutate(method = "step forward"),
max_loading_basic$best_sets %>% mutate(method = "max loading, basic"),
max_loading_balanced$best_sets %>% mutate(method = "max loading, balanced"),
max_loading_resid$best_sets %>% mutate(method = "max loading, resid")
) %>%
select(reliability, r_full_score, method, items_in_scale, criterion_value) %>%
pivot_longer(
cols = c("reliability", "r_full_score", "criterion_value"),
names_to = "criterion",
values_to = "value"
) %>%
mutate(
criterion = case_when(
criterion == "reliability" ~ "Reliability",
criterion == "r_full_score" ~ "Cor. with full score",
criterion == "criterion_value" ~ "Combined index"
)
)
#plot it
abbrev_results %>%
ggplot(aes(items_in_scale, value, color = method)) +
geom_line() +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, .1)) +
# geom_point() +
facet_wrap("criterion")
GG_save("figs/abbreviation_comparison.png")
#make norms for abbreviated scales
abbrev_scales_items = seq(10, 50, 5)
abbrev_scales_norms = map(abbrev_scales_items, function(item_count) {
make_norms(
score = vocab_abbrev_forward$best_sets %>% filter(items_in_scale == item_count) %>% extract2("scores") %>% extract2(1) %>% extract(, 1),
age = d2$age,
norm_group = d2$white_only,
p_value = .05
)
}) %>% set_names("scale_" + abbrev_scales_items)
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.043). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.016). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.002**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = <0.001***). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.001**). Model used.
## Detected linear effect of age on the score (p = <0.001***). Model used.
## Detected variance effect of age on the score (p = 0.002**). Model used.
#add 100 item version
abbrev_scales_norms$scale_100 = norms_100
abbrev_scales_norms %>%
write_rds("data/abbrev_scales_norms.rds", compress = "xz")
#move scores into main dataset
for (scale in abbrev_scales_items) {
d2[["vocab_IQ_" + scale]] = abbrev_scales_norms[["scale_" + scale]]$data$IQ
}
#verify that age norming was done correctly by checking for age effects and white mean/SD
describe2(d2$vocab_IQ_30, d2$white_only)
## New names:
## • `` -> `...1`
## # A tibble: 2 × 11
## group var n mean median sd mad min max skew kurtosis
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FALSE ...1 114 97.0 93.9 16.0 15.3 60.3 141. 0.433 -0.215
## 2 TRUE ...1 327 100 99.0 15 17.0 66.9 149. 0.296 -0.522
#linear effect of age?
GG_scatter(d2, "age", "vocab_IQ_30")
## `geom_smooth()` using formula = 'y ~ x'
#and no heteroscedasticity
test_HS(d2$vocab_IQ_30, d2$age)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## # A tibble: 4 × 5
## test r2adj p fit log10_p
## <chr> <dbl> <dbl> <named list> <dbl>
## 1 linear raw -0.000649 0.397 <ols> 0.401
## 2 spline raw -0.00451 0.723 <ols> 0.141
## 3 linear rank -0.00149 0.556 <ols> 0.255
## 4 spline rank -0.00306 0.506 <ols> 0.296
#compare reliability plots for full and abbreviated scales
scale_reliabilities = map_df(c(-1, -2, abbrev_scales_items), function(item_count) {
# if 0 items, get the full fit results
#otherwise get the abbreviated scales
if (item_count == -1) {
fit = all_items_fit
item_count = good_items_scored %>% select(-id) %>% ncol()
} else if (item_count == -2) {
fit = good_items_passrate_filter_fit
item_count = good_items_passrate_filter %>% ncol()
} else {
fit = vocab_abbrev_forward$best_sets %>% filter(items_in_scale == item_count) %>% extract2("fit") %>% extract2(1) %>% extract2("fit")
}
#get reliabilities
rels = get_reliabilities(fit) %>%
mutate(items_in_scale = item_count)
rels
})
#make a joint plot
scale_reliabilities %>%
mutate(
items_in_scale = as.factor(items_in_scale)
) %>%
ggplot(aes(z, rel, color = items_in_scale)) +
geom_line() +
scale_y_continuous("Reliability", breaks = seq(0, 1, .05)) +
scale_x_continuous("Ability level (z)") +
coord_cartesian(xlim = c(-4, 4)) +
scale_color_discrete("Items in scale")
GG_save("figs/reliability comparison.png")
#overall reliability as function of item count
scale_reliabilities_empirical = bind_rows(
vocab_abbrev_forward$best_sets %>%
select(items_in_scale, reliability),
tibble(
items_in_scale = good_items_passrate_filter %>% ncol(),
reliability = empirical_rxx(fscores(good_items_passrate_filter_fit, full.scores.SE = T))
),
tibble(
items_in_scale = good_items_scored %>% select(-id) %>% ncol(),
reliability = empirical_rxx(fscores(good_items_fit, full.scores.SE = T))
)
)
scale_reliabilities_empirical %>%
print(n = Inf)
## # A tibble: 50 × 2
## items_in_scale reliability
## <int> <dbl>
## 1 3 0.658
## 2 4 0.707
## 3 5 0.733
## 4 6 0.757
## 5 7 0.779
## 6 8 0.795
## 7 9 0.811
## 8 10 0.821
## 9 11 0.831
## 10 12 0.840
## 11 13 0.848
## 12 14 0.856
## 13 15 0.862
## 14 16 0.867
## 15 17 0.873
## 16 18 0.877
## 17 19 0.882
## 18 20 0.886
## 19 21 0.890
## 20 22 0.893
## 21 23 0.896
## 22 24 0.900
## 23 25 0.902
## 24 26 0.904
## 25 27 0.906
## 26 28 0.908
## 27 29 0.910
## 28 30 0.913
## 29 31 0.914
## 30 32 0.916
## 31 33 0.918
## 32 34 0.920
## 33 35 0.921
## 34 36 0.923
## 35 37 0.925
## 36 38 0.926
## 37 39 0.927
## 38 40 0.928
## 39 41 0.929
## 40 42 0.930
## 41 43 0.931
## 42 44 0.932
## 43 45 0.933
## 44 46 0.935
## 45 47 0.935
## 46 48 0.936
## 47 49 0.937
## 48 50 0.937
## 49 100 0.954
## 50 219 0.972
#reliability range as function of item count
scale_reliability_ranges = map_df(scale_reliabilities$items_in_scale %>% unique(), \(x) {
scale_reliabilities %>%
filter(items_in_scale == x) %>%
reliability_range(min = c(.80, .85, .90)) %>%
mutate(items_in_scale = x)
}) %>%
left_join(
scale_reliabilities_empirical
) %>%
arrange(items_in_scale)
## Joining with `by = join_by(items_in_scale)`
scale_reliability_ranges %>%
print(n = Inf)
## # A tibble: 33 × 6
## min_reliability lower_z upper_z coverage items_in_scale reliability
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.8 -1.24 0.814 0.684 10 0.821
## 2 0.85 -1.06 0.513 0.550 10 0.821
## 3 0.9 -0.693 0.0905 0.292 10 0.821
## 4 0.8 -1.42 1.18 0.802 15 0.862
## 5 0.85 -1.24 0.874 0.701 15 0.862
## 6 0.9 -0.935 0.392 0.477 15 0.862
## 7 0.8 -1.60 1.42 0.867 20 0.886
## 8 0.85 -1.42 1.12 0.789 20 0.886
## 9 0.9 -1.12 0.693 0.624 20 0.886
## 10 0.8 -1.78 1.54 0.900 25 0.902
## 11 0.85 -1.54 1.24 0.830 25 0.902
## 12 0.9 -1.24 0.814 0.684 25 0.902
## 13 0.8 -1.90 1.72 0.928 30 0.913
## 14 0.85 -1.66 1.42 0.873 30 0.913
## 15 0.9 -1.36 0.995 0.753 30 0.913
## 16 0.8 -2.02 1.84 0.945 35 0.921
## 17 0.85 -1.78 1.54 0.900 35 0.921
## 18 0.9 -1.48 1.12 0.798 35 0.921
## 19 0.8 -2.14 1.96 0.959 40 0.928
## 20 0.85 -1.90 1.66 0.923 40 0.928
## 21 0.9 -1.54 1.24 0.830 40 0.928
## 22 0.8 -2.26 2.14 0.972 45 0.933
## 23 0.85 -2.02 1.78 0.941 45 0.933
## 24 0.9 -1.66 1.30 0.854 45 0.933
## 25 0.8 -2.38 2.20 0.978 50 0.937
## 26 0.85 -2.14 1.84 0.951 50 0.937
## 27 0.9 -1.78 1.36 0.875 50 0.937
## 28 0.8 -3.11 2.98 0.998 100 0.954
## 29 0.85 -2.74 2.56 0.992 100 0.954
## 30 0.9 -2.32 2.02 0.968 100 0.954
## 31 0.8 -4.55 3.41 1.00 219 0.972
## 32 0.85 -4.25 2.92 0.998 219 0.972
## 33 0.9 -3.89 2.32 0.990 219 0.972
#plot reliability coverages by item count and minimum reliability
scale_reliability_ranges %>%
mutate(
z = 1
) %>%
ggplot(aes(z, factor(items_in_scale), coverage, color = factor(min_reliability))) +
geom_errorbarh(aes(xmin = lower_z, xmax = upper_z), position = "dodge") +
geom_label(aes(x = 0, label = scales::label_percent(1)(coverage)), position = position_dodge(width = 1), size = 3) +
scale_color_discrete("Minimum reliability") +
scale_y_discrete("Items in scale") +
scale_x_continuous("Coverage of a normal distribution")
GG_save("figs/reliability coverage.png")
#50 item version
(abbrev_50_names = names(good_items_passrate_filter)[vocab_abbrev_forward$best_sets %>% tail(1) %>% pull(item_set) %>% extract2(1)] %>% gtools::mixedsort())
## [1] "1of5_1" "2of5_2" "2of5_3" "2of5_4" "2of5_6"
## [6] "2of5_8" "2of5_9" "2of5_16" "2of5_17" "2of5_18"
## [11] "2of5_22" "2of5_25" "2of5_26" "2of5_27" "2of5_31"
## [16] "3of5_4" "3of5_6" "3of5_11" "3of5_20" "3of5_25"
## [21] "3of5_26" "a_type_of_hat" "a_type_of_overshoe" "a_type_of_wavy_form" "auspices"
## [26] "bow" "brief" "diatribes" "divergence" "entanglement"
## [31] "environment" "excite" "greed" "impromptu" "lackadaisical"
## [36] "meal" "pamper" "referendum" "relevant" "sag"
## [41] "schemer" "sensitivity" "shadows" "sketch" "sluggish"
## [46] "somber" "sporadic" "stylish" "tyrant" "vulgar"
#100 item version (all good items with proper pass rates)
(abbrev_100_names = names(good_items_passrate_filter) %>% gtools::mixedsort())
## [1] "1of5_1" "1of5_2" "1of5_3" "1of5_4"
## [5] "2of5_1" "2of5_2" "2of5_3" "2of5_4"
## [9] "2of5_5" "2of5_6" "2of5_7" "2of5_8"
## [13] "2of5_9" "2of5_10" "2of5_11" "2of5_12"
## [17] "2of5_13" "2of5_15" "2of5_16" "2of5_17"
## [21] "2of5_18" "2of5_19" "2of5_20" "2of5_21"
## [25] "2of5_22" "2of5_23" "2of5_25" "2of5_26"
## [29] "2of5_27" "2of5_28" "2of5_29" "2of5_30"
## [33] "2of5_31" "2of5_33" "2of5_34" "2of5_35"
## [37] "3of5_1" "3of5_4" "3of5_5" "3of5_6"
## [41] "3of5_7" "3of5_11" "3of5_12" "3of5_14"
## [45] "3of5_16" "3of5_17" "3of5_18" "3of5_20"
## [49] "3of5_21" "3of5_22" "3of5_24" "3of5_25"
## [53] "3of5_26" "3of5_28" "3of5_30" "3of5_31"
## [57] "3of5_32" "a_type_of_hat" "a_type_of_overshoe" "a_type_of_smokeless_powder"
## [61] "a_type_of_sound" "a_type_of_sword" "a_type_of_wavy_form" "auspices"
## [65] "bow" "brief" "couch" "deceit"
## [69] "diatribes" "disjoined" "divergence" "dome"
## [73] "entanglement" "environment" "excite" "flatteries"
## [77] "greed" "illness" "impromptu" "lackadaisical"
## [81] "meal" "pamper" "referendum" "relating_to_the_right"
## [85] "relevant" "sag" "schemer" "sensitivity"
## [89] "shadows" "sketch" "slave" "sluggish"
## [93] "somber" "sporadic" "stylish" "transportation"
## [97] "tyrant" "unhealthful" "vulgar" "wandering"
#write main data to file for reuse
d2 %>% write_rds("data/main data.rds", compress = "xz")
#save scored items
good_items_scored %>% write_rds("data/item data.rds", compress = "xz")
#save norms
vocab_norms %>% write_rds("data/vocab norms.rds", compress = "xz")
abbrev_scales_norms %>% write_rds("data/vocab abbrev norms.rds", compress = "xz")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/6gcy4/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figs", "vocab.Rmd", "vocab.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}