Data source
Authors shared data from:
Init
options(digits = 3)
library(pacman)
p_load(kirkegaard, haven, rms, mirt)
Data
#read data
d = read_sav("data/GeneralDataset_SLATINT_Emil 2019.sav")
#variable table
var_table = df_var_table(d)
var_table
IRT
#selected items
select_items = c(str_subset(names(d), "^PISA"), str_subset(names(d), "^SPM.+?\\d$"))
#excluded cases, without any data at all
excluded_cases = ((d[select_items] %>% miss_by_case(prop = T)) == 1)
d2 = d[!excluded_cases, ]
#remove items without enough variance
select_item_pass_rates = d2[select_items] %>% colMeans(na.rm=T)
select_items2 = select_items[select_item_pass_rates > .01 & select_item_pass_rates < .99]
#simple IRT model
irt_g = mirt::mirt(d2[select_items2], model = 1, verbose = F)
## Item re-scored so that all values are within a distance of 1
irt_g_summary = irt_g %>% summary()
## F1 h2
## PISA1 0.479 0.2290
## PISA2 0.569 0.3236
## PISA3 0.594 0.3528
## PISA4 0.552 0.3046
## PISA5 0.495 0.2453
## PISA6 0.474 0.2247
## PISA7 0.426 0.1816
## PISA8 0.651 0.4236
## PISA9 0.668 0.4458
## PISA10 0.656 0.4300
## PISA11 0.522 0.2726
## PISA12 0.438 0.1917
## PISA13 0.452 0.2042
## PISA14 0.526 0.2764
## PISA15 0.415 0.1726
## PISA16 0.410 0.1680
## SPM_A5 0.438 0.1917
## SPM_A7 0.676 0.4574
## SPM_A8 0.482 0.2325
## SPM_A9 0.650 0.4222
## SPM_A10 0.544 0.2959
## SPM_A11 0.525 0.2755
## SPM_A12 0.448 0.2005
## SPM_B2 0.570 0.3245
## SPM_B3 0.739 0.5462
## SPM_B4 0.665 0.4424
## SPM_B5 0.664 0.4413
## SPM_B6 0.535 0.2862
## SPM_B7 0.464 0.2151
## SPM_B8 0.512 0.2625
## SPM_B9 0.609 0.3708
## SPM_B10 0.737 0.5430
## SPM_B11 0.552 0.3047
## SPM_B12 0.553 0.3060
## SPM_C1 0.681 0.4643
## SPM_C2 0.655 0.4291
## SPM_C3 0.603 0.3634
## SPM_C4 0.508 0.2578
## SPM_C5 0.643 0.4129
## SPM_C6 0.555 0.3082
## SPM_C7 0.782 0.6113
## SPM_C8 0.522 0.2722
## SPM_C9 0.543 0.2948
## SPM_C10 0.504 0.2540
## SPM_C11 0.526 0.2765
## SPM_C12 0.577 0.3328
## SPM_D1 0.684 0.4673
## SPM_D2 0.667 0.4451
## SPM_D3 0.684 0.4684
## SPM_D4 0.645 0.4158
## SPM_D5 0.695 0.4825
## SPM_D6 0.652 0.4256
## SPM_D7 0.576 0.3323
## SPM_D8 0.479 0.2295
## SPM_D9 0.598 0.3572
## SPM_D10 0.734 0.5385
## SPM_D11 0.312 0.0976
## SPM_D12 0.371 0.1376
## SPM_E1 0.649 0.4217
## SPM_E2 0.611 0.3736
## SPM_E3 0.697 0.4852
## SPM_E4 0.753 0.5676
## SPM_E5 0.790 0.6247
## SPM_E6 0.675 0.4561
## SPM_E7 0.499 0.2493
## SPM_E8 0.644 0.4151
## SPM_E9 0.551 0.3040
## SPM_E10 0.582 0.3392
## SPM_E11 0.515 0.2653
## SPM_E12 0.362 0.1310
##
## SS loadings: 23.9
## Proportion Var: 0.341
##
## Factor correlations:
##
## F1
## F1 1
irt_g_scores = irt_g %>% mirt::fscores()
d2$irt_g = irt_g_scores[, 1] %>% standardize()
Simple results
#sex
GG_denhist(d2, "irt_g", "Sex")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#birth order
GG_denhist(d2, "irt_g", "SES_school")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#age
GG_scatter(d2, "Age", "irt_g")
