Data source

Authors shared data from:

Init

options(digits = 3)
library(pacman)
p_load(kirkegaard, haven, rms, mirt)

Data

#read data
d = read_sav("data/GeneralDataset_SLATINT_Emil 2019.sav")

#variable table
var_table = df_var_table(d)

var_table

IRT

#selected items
select_items = c(str_subset(names(d), "^PISA"), str_subset(names(d), "^SPM.+?\\d$"))

#excluded cases, without any data at all
excluded_cases = ((d[select_items] %>% miss_by_case(prop = T)) == 1)
d2 = d[!excluded_cases, ]

#remove items without enough variance
select_item_pass_rates = d2[select_items] %>% colMeans(na.rm=T)
select_items2 = select_items[select_item_pass_rates > .01 & select_item_pass_rates < .99]

#simple IRT model
irt_g = mirt::mirt(d2[select_items2], model = 1, verbose = F)
## Item re-scored so that all values are within a distance of 1
irt_g_summary = irt_g %>% summary()
##            F1     h2
## PISA1   0.479 0.2290
## PISA2   0.569 0.3236
## PISA3   0.594 0.3528
## PISA4   0.552 0.3046
## PISA5   0.495 0.2453
## PISA6   0.474 0.2247
## PISA7   0.426 0.1816
## PISA8   0.651 0.4236
## PISA9   0.668 0.4458
## PISA10  0.656 0.4300
## PISA11  0.522 0.2726
## PISA12  0.438 0.1917
## PISA13  0.452 0.2042
## PISA14  0.526 0.2764
## PISA15  0.415 0.1726
## PISA16  0.410 0.1680
## SPM_A5  0.438 0.1917
## SPM_A7  0.676 0.4574
## SPM_A8  0.482 0.2325
## SPM_A9  0.650 0.4222
## SPM_A10 0.544 0.2959
## SPM_A11 0.525 0.2755
## SPM_A12 0.448 0.2005
## SPM_B2  0.570 0.3245
## SPM_B3  0.739 0.5462
## SPM_B4  0.665 0.4424
## SPM_B5  0.664 0.4413
## SPM_B6  0.535 0.2862
## SPM_B7  0.464 0.2151
## SPM_B8  0.512 0.2625
## SPM_B9  0.609 0.3708
## SPM_B10 0.737 0.5430
## SPM_B11 0.552 0.3047
## SPM_B12 0.553 0.3060
## SPM_C1  0.681 0.4643
## SPM_C2  0.655 0.4291
## SPM_C3  0.603 0.3634
## SPM_C4  0.508 0.2578
## SPM_C5  0.643 0.4129
## SPM_C6  0.555 0.3082
## SPM_C7  0.782 0.6113
## SPM_C8  0.522 0.2722
## SPM_C9  0.543 0.2948
## SPM_C10 0.504 0.2540
## SPM_C11 0.526 0.2765
## SPM_C12 0.577 0.3328
## SPM_D1  0.684 0.4673
## SPM_D2  0.667 0.4451
## SPM_D3  0.684 0.4684
## SPM_D4  0.645 0.4158
## SPM_D5  0.695 0.4825
## SPM_D6  0.652 0.4256
## SPM_D7  0.576 0.3323
## SPM_D8  0.479 0.2295
## SPM_D9  0.598 0.3572
## SPM_D10 0.734 0.5385
## SPM_D11 0.312 0.0976
## SPM_D12 0.371 0.1376
## SPM_E1  0.649 0.4217
## SPM_E2  0.611 0.3736
## SPM_E3  0.697 0.4852
## SPM_E4  0.753 0.5676
## SPM_E5  0.790 0.6247
## SPM_E6  0.675 0.4561
## SPM_E7  0.499 0.2493
## SPM_E8  0.644 0.4151
## SPM_E9  0.551 0.3040
## SPM_E10 0.582 0.3392
## SPM_E11 0.515 0.2653
## SPM_E12 0.362 0.1310
## 
## SS loadings:  23.9 
## Proportion Var:  0.341 
## 
## Factor correlations: 
## 
##    F1
## F1  1
irt_g_scores = irt_g %>% mirt::fscores()
d2$irt_g = irt_g_scores[, 1] %>% standardize()

Simple results

#sex
GG_denhist(d2, "irt_g", "Sex")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#birth order
GG_denhist(d2, "irt_g", "SES_school")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#age
GG_scatter(d2, "Age", "irt_g")