Init
options(digits = 2)
library(pacman)
p_load(kirkegaard, readr, ltm, rms)
#theme design
theme_large = theme(text = element_text(size = 15))
Data
d = readxl::read_xlsx("data/WFK04.xlsx") %>% na.omit()
#fix names
item_names = "item_" + 1:75
names(d) = c("item", item_names, "offspring")
#scores
cog_items = d[item_names]
#item data
fertility_data = readxl::read_xlsx("data/Fertility-pass rate.xlsx")
#recoded offspring
d$offspring_ordinal = ordered(d$offspring)
table2(d$offspring)
Main IRT analysis
#item x fertility relations
item_fertility = map_dbl(item_names, function(x) {
#extract item
d[c(x, "offspring")] %>%
#as normal data frame
as.data.frame %>%
#correlate
polycor::hetcor() %>%
#extract r
.[[1]] %>% .[1, 2]
})
#ordinal model, alternative
item_fertility_ordinal = map_dbl(item_names, function(x) {
#extract item
d_ = d[c(x, "offspring")] %>%
#as normal data frame
as.data.frame
colnames(d_) = c("item", "offspring")
#model
rms::orm(item ~ offspring, data = d_) %>% coef %>% .[2]
})
#compare methods
data_frame(polychoric = item_fertility, ordinal = item_fertility_ordinal) %>%
GG_scatter("polychoric", "ordinal") +
theme_large

#standard errors
item_fertility_se = map_dbl(item_names, function(x) {
#extract and calculate and extract
d[c(x, "offspring")] %>%
#as normal data frame
as.data.frame() %>%
#correlate
polycor::hetcor() %>%
#extract r
.[["std.errors"]] %>% .[1, 2]
})
#fit IRT fa
irt_fa = irt.fa(cog_items)
## Warning in cor.smooth(mat): Matrix was not positive definite, smoothing was
## done
## The estimated weights for the factor scores are probably incorrect. Try a different factor extraction method.

irt_fa
## Item Response Analysis using Factor Analysis
##
## Call: irt.fa(x = cog_items)
## Item Response Analysis using Factor Analysis
##
## Summary information by factor and item
## Factor = 1
## -3 -2 -1 0 1 2 3
## item_1 0.09 0.08 0.07 0.05 0.03 0.02 0.01
## item_2 0.28 0.24 0.13 0.05 0.02 0.01 0.00
## item_4 0.24 0.20 0.12 0.05 0.02 0.01 0.00
## item_5 0.34 0.26 0.12 0.04 0.01 0.00 0.00
## item_6 0.24 0.21 0.12 0.05 0.02 0.01 0.00
## item_7 0.21 0.17 0.10 0.05 0.02 0.01 0.00
## item_8 0.89 0.37 0.07 0.01 0.00 0.00 0.00
## item_9 0.93 0.33 0.06 0.01 0.00 0.00 0.00
## item_10 0.45 0.37 0.15 0.04 0.01 0.00 0.00
## item_11 0.49 0.42 0.16 0.04 0.01 0.00 0.00
## item_12 0.25 0.57 0.44 0.15 0.03 0.01 0.00
## item_13 0.39 0.34 0.15 0.05 0.01 0.00 0.00
## item_14 0.65 0.52 0.15 0.03 0.01 0.00 0.00
## item_15 0.33 0.47 0.28 0.09 0.03 0.01 0.00
## item_16 0.36 0.77 0.41 0.10 0.02 0.00 0.00
## item_17 0.55 0.82 0.28 0.05 0.01 0.00 0.00
## item_18 0.29 0.39 0.25 0.10 0.03 0.01 0.00
## item_19 0.23 0.45 0.38 0.15 0.05 0.01 0.00
## item_20 0.27 0.66 0.48 0.14 0.03 0.01 0.00
## item_21 0.71 0.62 0.16 0.03 0.00 0.00 0.00
## item_22 0.08 0.33 0.68 0.41 0.11 0.02 0.00
## item_23 0.23 0.36 0.28 0.13 0.04 0.01 0.00
## item_24 0.24 0.74 0.58 0.15 0.03 0.00 0.00
## item_25 0.13 0.29 0.35 0.22 0.09 0.03 0.01
## item_26 0.31 1.11 0.58 0.09 0.01 0.00 0.00
## item_27 0.25 0.60 0.48 0.15 0.03 0.01 0.00
## item_28 0.22 0.59 0.53 0.17 0.04 0.01 0.00
## item_29 0.18 0.40 0.41 0.19 0.06 0.02 0.00
## item_30 0.34 0.84 0.46 0.10 0.02 0.00 0.00
## item_31 0.10 0.27 0.42 0.29 0.11 0.03 0.01
## item_32 0.08 0.26 0.49 0.38 0.14 0.04 0.01
## item_33 0.06 0.13 0.22 0.24 0.16 0.08 0.03
## item_34 0.12 0.27 0.35 0.24 0.10 0.03 0.01
## item_35 0.03 0.07 0.13 0.19 0.20 0.14 0.07
## item_36 0.10 0.27 0.44 0.31 0.12 0.04 0.01
## item_37 0.08 0.32 0.67 0.42 0.11 0.02 0.00
## item_38 0.05 0.15 0.33 0.39 0.22 0.08 0.02
## item_39 0.10 0.29 0.46 0.31 0.11 0.03 0.01
## item_40 0.09 0.36 0.66 0.37 0.10 0.02 0.00
## item_41 0.02 0.15 0.65 0.81 0.23 0.04 0.01
## item_43 0.04 0.11 0.25 0.36 0.26 0.11 0.04
## item_44 0.03 0.16 0.63 0.76 0.23 0.04 0.01
## item_45 0.07 0.33 0.74 0.44 0.11 0.02 0.00
## item_46 0.06 0.09 0.12 0.13 0.10 0.07 0.04
## item_47 0.04 0.07 0.10 0.12 0.11 0.08 0.05
## item_48 0.03 0.08 0.19 0.30 0.26 0.14 0.06
## item_50 0.01 0.06 0.25 0.64 0.50 0.15 0.03
## item_53 0.05 0.15 0.29 0.33 0.20 0.08 0.03
## item_54 0.04 0.05 0.07 0.08 0.08 0.07 0.05
## item_55 0.03 0.09 0.24 0.42 0.33 0.14 0.04
## item_57 0.03 0.06 0.11 0.14 0.14 0.11 0.07
## item_58 0.03 0.06 0.10 0.13 0.13 0.10 0.07
## item_59 0.03 0.06 0.11 0.15 0.15 0.12 0.07
## item_60 0.04 0.09 0.16 0.22 0.19 0.12 0.06
## item_61 0.04 0.08 0.11 0.13 0.12 0.09 0.05
## item_63 0.04 0.08 0.14 0.19 0.18 0.12 0.06
## item_64 0.04 0.07 0.11 0.14 0.13 0.10 0.06
## item_65 0.04 0.08 0.14 0.19 0.17 0.11 0.06
## item_66 0.03 0.07 0.17 0.28 0.27 0.15 0.06
## item_70 0.04 0.07 0.10 0.12 0.11 0.09 0.06
## item_71 0.04 0.06 0.08 0.09 0.09 0.08 0.05
## Test Info 11.79 17.99 17.49 12.21 6.29 2.86 1.28
## SEM 0.29 0.24 0.24 0.29 0.40 0.59 0.88
## Reliability 0.92 0.94 0.94 0.92 0.84 0.65 0.22
##
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate,
## fm = fm)
##
## Test of the hypothesis that 1 factor is sufficient.
## The degrees of freedom for the model is 2700 and the objective function was 263
## The number of observations was 1409 with Chi Square = 363017 with prob < 0
##
## The root mean square of the residuals (RMSA) is 0.1
## The df corrected root mean square of the residuals is 0.1
##
## Tucker Lewis Index of factoring reliability = 0.062
## RMSEA index = 0.31 and the 10 % confidence intervals are 0.31 NA
## BIC = 343440
#item params
item_params = data_frame(
item = names(cog_items) %>% str_replace("item_", ""),
word = fertility_data$Word,
pass_rate = cog_items %>% colMeans(),
difficulty = irt_fa$irt$difficulty[[1]],
loading = irt_fa$fa$loadings[, 1],
discrimination = irt_fa$irt$discrimination %>% as.vector,
item_fertility = item_fertility,
item_fertility_se = item_fertility_se,
pass_rate_from_50 = abs(pass_rate - .5)
)
#print
print(item_params %>% dplyr::select(-word), n=Inf)
## # A tibble: 75 x 8
## item pass_rate difficulty loading discrimination item_fertility
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.994 -2.68 0.328 0.348 0.0365
## 2 2 0.991 -2.78 0.532 0.629 -0.0148
## 3 3 0.982 -2.14 0.211 0.216 0.0400
## 4 4 0.993 -2.82 0.496 0.572 0.00583
## 5 5 0.992 -2.93 0.563 0.681 -0.00389
## 6 6 0.992 -2.80 0.502 0.581 0.00946
## 7 7 0.996 -2.99 0.478 0.544 -0.0135
## 8 8 0.980 -3.07 0.743 1.11 -0.0659
## 9 9 0.981 -3.15 0.753 1.14 -0.0910
## 10 10 0.983 -2.71 0.624 0.799 -0.0318
## 11 11 0.978 -2.64 0.647 0.849 -0.0357
## 12 12 0.899 -1.72 0.671 0.906 -0.0770
## 13 13 0.984 -2.69 0.600 0.749 -0.0766
## 14 14 0.972 -2.67 0.701 0.982 -0.0589
## 15 15 0.948 -2.10 0.630 0.811 -0.0514
## 16 16 0.912 -1.94 0.717 1.03 -0.0781
## 17 17 0.935 -2.24 0.738 1.09 -0.0745
## 18 18 0.956 -2.12 0.591 0.733 -0.00238
## 19 19 0.906 -1.69 0.627 0.804 -0.0129
## 20 20 0.893 -1.73 0.698 0.975 -0.0625
## 21 21 0.962 -2.59 0.728 1.06 -0.0681
## 22 22 0.739 -0.893 0.697 0.973 -0.0352
## 23 23 0.933 -1.84 0.576 0.705 -0.0145
## 24 24 0.872 -1.66 0.728 1.06 -0.0739
## 25 25 0.840 -1.21 0.574 0.701 -0.0994
## 26 26 0.871 -1.82 0.784 1.26 -0.103
## 27 27 0.891 -1.68 0.683 0.935 -0.0812
## 28 28 0.874 -1.59 0.691 0.957 -0.0474
## 29 29 0.877 -1.48 0.622 0.794 -0.0348
## 30 30 0.900 -1.89 0.734 1.08 -0.0465
## 31 31 0.776 -0.953 0.606 0.762 -0.0375
## 32 32 0.725 -0.781 0.642 0.836 -0.0490
## 33 33 0.618 -0.349 0.506 0.587 -0.0703
## 34 34 0.811 -1.07 0.573 0.700 -0.0498
## 35 35 0.304 0.581 0.473 0.537 -0.0565
## 36 36 0.764 -0.909 0.614 0.777 -0.0618
## 37 37 0.737 -0.885 0.696 0.969 -0.112
## 38 38 0.586 -0.272 0.599 0.748 -0.0392
## 39 39 0.774 -0.964 0.624 0.798 -0.0512
## 40 40 0.762 -0.985 0.689 0.952 -0.0822
## 41 41 0.598 -0.372 0.747 1.12 -0.0494
## 42 42 0.476 0.0616 0.250 0.258 0.0124
## 43 43 0.491 0.0272 0.574 0.701 -0.0278
## 44 44 0.603 -0.385 0.738 1.09 -0.0664
## 45 45 0.732 -0.882 0.713 1.02 -0.0523
## 46 46 0.608 -0.298 0.389 0.423 -0.0204
## 47 47 0.409 0.249 0.374 0.404 0.0191
## 48 48 0.400 0.301 0.543 0.647 -0.0384
## 49 49 0.519 -0.0486 0.240 0.247 0.0112
## 50 50 0.412 0.308 0.695 0.967 -0.0548
## 51 51 0.462 0.0972 0.197 0.201 -0.0210
## 52 52 0.289 0.570 0.215 0.220 -0.0368
## 53 53 0.599 -0.304 0.566 0.687 -0.0388
## 54 54 0.348 0.412 0.315 0.332 -0.0183
## 55 55 0.435 0.206 0.610 0.769 -0.103
## 56 56 0.336 0.434 0.209 0.214 -0.0310
## 57 57 0.312 0.537 0.413 0.453 -0.0212
## 58 58 0.289 0.606 0.394 0.428 -0.0368
## 59 59 0.275 0.658 0.423 0.467 -0.00967
## 60 60 0.429 0.203 0.485 0.554 -0.0260
## 61 61 0.456 0.121 0.392 0.426 -0.0254
## 62 62 0.309 0.520 0.277 0.288 -0.0302
## 63 63 0.390 0.313 0.459 0.516 0.0110
## 64 64 0.356 0.404 0.401 0.437 -0.0666
## 65 65 0.395 0.298 0.456 0.513 -0.0600
## 66 66 0.360 0.427 0.541 0.644 -0.0389
## 67 67 0.147 1.07 0.200 0.204 -0.0129
## 68 68 0.203 0.846 0.189 0.193 -0.0304
## 69 69 0.253 0.675 0.183 0.186 -0.0152
## 70 70 0.365 0.373 0.377 0.407 -0.0267
## 71 71 0.332 0.461 0.339 0.360 -0.0699
## 72 72 0.202 0.850 0.181 0.184 0.0283
## 73 73 0.290 0.565 0.192 0.195 -0.0440
## 74 74 0.203 0.843 0.171 0.173 -0.00854
## 75 75 0.262 0.648 0.178 0.181 -0.0244
## # ... with 2 more variables: item_fertility_se <dbl>,
## # pass_rate_from_50 <dbl>
#cors
cor_matrix(item_params[-c(1:2)], CI = .95) %>% write_clipboard()
## X
## 1 1
## 2 -0.97 [-0.98 -0.95]
## 3 0.68 [0.54 0.79]
## 4 0.67 [0.52 0.78]
## 5 -0.19 [-0.40 0.04]
## 6 -0.27 [-0.47 -0.05]
## 7 0.75 [0.63 0.83]
## 8 -0.97 [-0.98 -0.95]
## 9 1
## 10 -0.61 [-0.73 -0.44]
## 11 -0.60 [-0.73 -0.43]
## 12 0.13 [-0.10 0.35]
## 13 0.22 [0.00 0.43]
## 14 -0.83 [-0.89 -0.74]
## 15 0.68 [0.54 0.79]
## 16 -0.61 [-0.73 -0.44]
## 17 1
## 18 0.99 [0.98 0.99]
## 19 -0.61 [-0.74 -0.45]
## 20 -0.52 [-0.67 -0.34]
## 21 0.33 [0.11 0.52]
## 22 0.67 [0.52 0.78]
## 23 -0.60 [-0.73 -0.43]
## 24 0.99 [0.98 0.99]
## 25 1
## 26 -0.64 [-0.76 -0.48]
## 27 -0.56 [-0.70 -0.38]
## 28 0.34 [0.13 0.53]
## 29 -0.19 [-0.40 0.04]
## 30 0.13 [-0.10 0.35]
## 31 -0.61 [-0.74 -0.45]
## 32 -0.64 [-0.76 -0.48]
## 33 1
## 34 0.86 [0.79 0.91]
## 35 -0.02 [-0.25 0.20]
## 36 -0.27 [-0.47 -0.05]
## 37 0.22 [0.00 0.43]
## 38 -0.52 [-0.67 -0.34]
## 39 -0.56 [-0.70 -0.38]
## 40 0.86 [0.79 0.91]
## 41 1
## 42 -0.12 [-0.34 0.11]
## 43 0.75 [0.63 0.83]
## 44 -0.83 [-0.89 -0.74]
## 45 0.33 [0.11 0.52]
## 46 0.34 [0.13 0.53]
## 47 -0.02 [-0.25 0.20]
## 48 -0.12 [-0.34 0.11]
## 49 1
wtd.cors(item_params[-c(1:2)]) %>% .[5, 1:5]
## pass_rate difficulty loading discrimination item_fertility
## -0.19 0.13 -0.61 -0.64 1.00
cor(item_params[-c(1:2)], use = "pairwise.complete.obs") %>% .[5, 1:5]
## pass_rate difficulty loading discrimination item_fertility
## -0.19 0.13 -0.61 -0.64 1.00
cor(item_params[-c(1:2)], use = "complete.obs") %>% .[5, 1:5]
## pass_rate difficulty loading discrimination item_fertility
## -0.19 0.13 -0.61 -0.64 1.00
#corr.test
corr.test(data.frame(item_params$loading, item_params$item_fertility)) %>% print(short=F)
## Call:corr.test(x = data.frame(item_params$loading, item_params$item_fertility))
## Correlation matrix
## item_params.loading item_params.item_fertility
## item_params.loading 1.00 -0.61
## item_params.item_fertility -0.61 1.00
## Sample Size
## [1] 75
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## item_params.loading item_params.item_fertility
## item_params.loading 0 0
## item_params.item_fertility 0 0
##
## Confidence intervals based upon normal theory. To get bootstrapped values, try cor.ci
## raw.lower raw.r raw.upper raw.p lower.adj upper.adj
## itm_.-it_._ -0.74 -0.61 -0.45 0 -0.74 -0.45
#score persons
irt_fa_score = scoreIrt(irt_fa, cog_items)
#save and standardize
d$theta = irt_fa_score$theta1 %>% standardize()
#plot
GG_denhist(d, "theta") + theme_large
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

describe(d$theta) %>% as.data.frame()
GG_save("figs/theta_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_scatter(item_params, "loading", "item_fertility", case_names = "item") +
xlab("Item loading") + ylab("Item-fertility latent correlation") +
theme_large

GG_save("figs/loading_fert.png")
GG_scatter(item_params, "difficulty", "item_fertility", case_names = "item") +
xlab("Item difficulty") + ylab("Item-fertility latent correlation") +
theme_large

GG_save("figs/difficulty_fert.png")
#model
lm(item_fertility ~ loading + difficulty, data = item_params) %>% MOD_summary(kfold = F)
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## loading -0.84 0.11 -1.1 -0.63
## difficulty -0.38 0.11 -0.6 -0.17
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 item_fertility 75 72 0.47 0.45 NA
##
##
## Etas from analysis of variance
## Eta Eta_partial
## loading 0.67 0.68
## difficulty 0.30 0.38
Person-level analysis
#estimate relationship between g and fertility using polr
(polr_fit = MASS::polr(offspring_ordinal ~ theta, data = d) %>% summary())
##
## Re-fitting to get Hessian
## Call:
## MASS::polr(formula = offspring_ordinal ~ theta, data = d)
##
## Coefficients:
## Value Std. Error t value
## theta -0.189 0.0489 -3.86
##
## Intercepts:
## Value Std. Error t value
## 0|1 -1.072 0.061 -17.501
## 1|2 -0.343 0.054 -6.320
## 2|3 1.539 0.070 22.026
## 3|4 2.799 0.114 24.544
## 4|5 4.162 0.215 19.354
## 5|6 5.880 0.501 11.742
##
## Residual Deviance: 4122.66
## AIC: 4136.66
#latent correlation
(latent_cor_fit = polycor::hetcor(d[c("offspring_ordinal", "theta")] %>% as.data.frame()))
##
## Two-Step Estimates
##
## Correlations/Type of Correlation:
## offspring_ordinal theta
## offspring_ordinal 1 Polyserial
## theta -0.114 1
##
## Standard Errors:
## offspring_ordinal theta
## 0.0276
## Levels: 0.0276
##
## n = 1409
##
## P-values for Tests of Bivariate Normality:
## offspring_ordinal theta
## 2.16e-17
## Levels: 2.16e-17
#plot to look for nonlinear effects
GG_scatter(d, "theta", "offspring") +
geom_smooth() +
theme_large
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#distributions
GG_denhist(d, "theta") + theme_large
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_denhist(d, "offspring") + theme_large
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Other IRT analyses
#extra IRT models
#using the ltm library
irt_fa_3pl = ltm::tpm(cog_items)
irt_fa_2pl = ltm::tpm(cog_items, constraint = matrix(c(1:75, rep(1, 75), rep(0, 75)), ncol = 3))
#output params
irt_fa_3pl
##
## Call:
## ltm::tpm(data = cog_items)
##
## Coefficients:
## Gussng Dffclt Dscrmn
## item_1 0.003 -6.674 0.84
## item_2 0.001 -4.050 1.46
## item_3 0.058 -9.951 0.40
## item_4 0.000 -4.210 1.49
## item_5 0.000 -3.892 1.66
## item_6 0.000 -4.204 1.45
## item_7 0.000 -4.554 1.51
## item_8 0.000 -2.594 2.95
## item_9 0.277 -2.387 3.17
## item_10 0.000 -3.351 1.64
## item_11 0.000 -3.162 1.64
## item_12 0.000 -2.007 1.49
## item_13 0.000 -3.518 1.55
## item_14 0.292 -2.300 2.42
## item_15 0.354 -2.034 1.79
## item_16 0.266 -1.540 2.24
## item_17 0.318 -1.581 3.02
## item_18 0.000 -2.955 1.33
## item_19 0.000 -2.245 1.30
## item_20 0.126 -1.656 1.81
## item_21 0.000 -2.519 1.98
## item_22 0.000 -0.961 1.53
## item_23 0.491 -1.620 1.75
## item_24 0.167 -1.333 2.21
## item_25 0.000 -1.918 1.05
## item_26 0.111 -1.331 2.47
## item_27 0.076 -1.693 1.80
## item_28 0.254 -1.313 1.94
## item_29 0.000 -1.954 1.31
## item_30 0.218 -1.457 2.37
## item_31 0.183 -0.923 1.46
## item_32 0.223 -0.490 1.92
## item_33 0.316 0.212 2.27
## item_34 0.256 -1.001 1.51
## item_35 0.104 0.957 2.28
## item_36 0.329 -0.478 2.08
## item_37 0.056 -0.843 1.66
## item_38 0.132 -0.068 1.65
## item_39 0.000 -1.288 1.24
## item_40 0.220 -0.653 2.10
## item_41 0.053 -0.218 2.17
## item_42 0.341 1.181 1.64
## item_43 0.103 0.265 1.56
## item_44 0.098 -0.157 2.43
## item_45 0.193 -0.528 2.31
## item_46 0.417 0.582 2.22
## item_47 0.141 1.004 0.92
## item_48 0.173 0.716 2.93
## item_49 0.446 1.198 3.85
## item_50 0.052 0.397 2.62
## item_51 0.379 1.312 2.54
## item_52 0.230 1.426 5.12
## item_53 0.209 0.048 2.00
## item_54 0.211 1.071 3.08
## item_55 0.117 0.460 2.57
## item_56 0.247 1.276 3.63
## item_57 0.151 1.116 2.17
## item_58 0.090 1.100 1.70
## item_59 0.112 1.066 2.69
## item_60 0.224 0.769 2.65
## item_61 0.260 0.849 1.93
## item_62 0.198 1.238 2.95
## item_63 0.200 0.853 2.74
## item_64 0.192 1.026 2.39
## item_65 0.132 0.856 1.26
## item_66 0.093 0.777 1.68
## item_67 0.091 1.757 2.66
## item_68 0.127 1.792 1.92
## item_69 0.184 1.420 4.05
## item_70 0.238 1.156 2.55
## item_71 0.197 1.074 3.32
## item_72 0.146 1.691 2.86
## item_73 0.197 1.623 1.81
## item_74 0.144 2.421 1.33
## item_75 0.205 1.761 2.34
##
## Log.Lik: -41383
irt_fa_2pl
##
## Call:
## ltm::tpm(data = cog_items, constraint = matrix(c(1:75, rep(1,
## 75), rep(0, 75)), ncol = 3))
##
## Coefficients:
## Gussng Dffclt Dscrmn
## item_1 0 -8.148 0.65
## item_2 0 -2.573 2.44
## item_3 0 -10.200 0.39
## item_4 0 -3.137 1.90
## item_5 0 -2.678 2.39
## item_6 0 -2.830 2.17
## item_7 0 -3.022 2.29
## item_8 0 -1.815 4.23
## item_9 0 -1.832 4.19
## item_10 0 -2.337 2.36
## item_11 0 -2.113 2.60
## item_12 0 -1.504 1.96
## item_13 0 -2.456 2.23
## item_14 0 -1.873 3.03
## item_15 0 -1.827 2.22
## item_16 0 -1.437 2.48
## item_17 0 -1.472 3.14
## item_18 0 -2.229 1.71
## item_19 0 -1.704 1.67
## item_20 0 -1.391 2.18
## item_21 0 -1.758 2.99
## item_22 0 -0.755 1.80
## item_23 0 -1.852 1.84
## item_24 0 -1.195 2.52
## item_25 0 -1.587 1.18
## item_26 0 -1.109 3.07
## item_27 0 -1.394 2.13
## item_28 0 -1.294 2.14
## item_29 0 -1.463 1.72
## item_30 0 -1.318 2.68
## item_31 0 -1.000 1.52
## item_32 0 -0.715 1.75
## item_33 0 -0.385 1.28
## item_34 0 -1.172 1.52
## item_35 0 0.971 1.22
## item_36 0 -0.894 1.67
## item_37 0 -0.742 1.84
## item_38 0 -0.234 1.43
## item_39 0 -1.008 1.48
## item_40 0 -0.818 1.95
## item_41 0 -0.225 2.20
## item_42 0 0.303 0.56
## item_43 0 0.132 1.33
## item_44 0 -0.240 2.21
## item_45 0 -0.681 2.08
## item_46 0 -0.470 0.87
## item_47 0 0.754 0.63
## item_48 0 0.504 1.31
## item_49 0 -0.035 0.50
## item_50 0 0.358 2.12
## item_51 0 0.480 0.45
## item_52 0 1.910 0.54
## item_53 0 -0.285 1.42
## item_54 0 0.961 0.85
## item_55 0 0.318 1.66
## item_56 0 1.340 0.60
## item_57 0 1.112 0.93
## item_58 0 1.169 1.02
## item_59 0 1.161 1.15
## item_60 0 0.426 1.09
## item_61 0 0.356 0.84
## item_62 0 1.349 0.73
## item_63 0 0.608 1.09
## item_64 0 0.868 0.92
## item_65 0 0.670 0.88
## item_66 0 0.710 1.21
## item_67 0 2.933 0.68
## item_68 0 2.600 0.59
## item_69 0 2.141 0.57
## item_70 0 0.929 0.77
## item_71 0 1.028 0.89
## item_72 0 3.016 0.50
## item_73 0 2.035 0.49
## item_74 0 4.008 0.36
## item_75 0 2.800 0.40
##
## Log.Lik: -41827
#add to item params
item_params$discrimination_3pl = irt_fa_3pl$coefficients[, 3]
item_params$discrimination_2pl = irt_fa_2pl$coefficients[, 3]
item_params$difficulty_2pl = irt_fa_2pl$coefficients[, 2] * -1
item_params$difficulty_3pl = irt_fa_3pl$coefficients[, 2]
#cors
cor_matrix(item_params[-c(1:2)], CI = .95) %>% write_clipboard()
## X
## 1 1
## 2 -0.97 [-0.98 -0.95]
## 3 0.68 [0.54 0.79]
## 4 0.67 [0.52 0.78]
## 5 -0.19 [-0.40 0.04]
## 6 -0.27 [-0.47 -0.05]
## 7 0.75 [0.63 0.83]
## 8 -0.44 [-0.61 -0.24]
## 9 0.74 [0.62 0.83]
## 10 -0.93 [-0.96 -0.90]
## 11 0.95 [0.92 0.97]
## 12 -0.97 [-0.98 -0.95]
## 13 1
## 14 -0.61 [-0.73 -0.44]
## 15 -0.60 [-0.73 -0.43]
## 16 0.13 [-0.10 0.35]
## 17 0.22 [0.00 0.43]
## 18 -0.83 [-0.89 -0.74]
## 19 0.37 [0.16 0.55]
## 20 -0.77 [-0.85 -0.66]
## 21 0.99 [0.99 0.99]
## 22 -0.97 [-0.98 -0.95]
## 23 0.68 [0.54 0.79]
## 24 -0.61 [-0.73 -0.44]
## 25 1
## 26 0.99 [0.98 0.99]
## 27 -0.61 [-0.74 -0.45]
## 28 -0.52 [-0.67 -0.34]
## 29 0.33 [0.11 0.52]
## 30 -0.23 [-0.43 0.00]
## 31 0.86 [0.78 0.91]
## 32 -0.56 [-0.70 -0.38]
## 33 0.69 [0.55 0.79]
## 34 0.67 [0.52 0.78]
## 35 -0.60 [-0.73 -0.43]
## 36 0.99 [0.98 0.99]
## 37 1
## 38 -0.64 [-0.76 -0.48]
## 39 -0.56 [-0.70 -0.38]
## 40 0.34 [0.13 0.53]
## 41 -0.16 [-0.37 0.07]
## 42 0.88 [0.82 0.92]
## 43 -0.55 [-0.69 -0.37]
## 44 0.67 [0.53 0.78]
## 45 -0.19 [-0.40 0.04]
## 46 0.13 [-0.10 0.35]
## 47 -0.61 [-0.74 -0.45]
## 48 -0.64 [-0.76 -0.48]
## 49 1
## 50 0.86 [0.79 0.91]
## 51 -0.02 [-0.25 0.20]
## 52 -0.13 [-0.35 0.10]
## 53 -0.51 [-0.66 -0.32]
## 54 0.10 [-0.13 0.32]
## 55 -0.18 [-0.39 0.05]
## 56 -0.27 [-0.47 -0.05]
## 57 0.22 [0.00 0.43]
## 58 -0.52 [-0.67 -0.34]
## 59 -0.56 [-0.70 -0.38]
## 60 0.86 [0.79 0.91]
## 61 1
## 62 -0.12 [-0.34 0.11]
## 63 -0.02 [-0.25 0.21]
## 64 -0.45 [-0.61 -0.25]
## 65 0.19 [-0.03 0.40]
## 66 -0.27 [-0.46 -0.04]
## 67 0.75 [0.63 0.83]
## 68 -0.83 [-0.89 -0.74]
## 69 0.33 [0.11 0.52]
## 70 0.34 [0.13 0.53]
## 71 -0.02 [-0.25 0.20]
## 72 -0.12 [-0.34 0.11]
## 73 1
## 74 -0.31 [-0.50 -0.09]
## 75 0.58 [0.41 0.71]
## 76 -0.83 [-0.89 -0.75]
## 77 0.78 [0.67 0.85]
## 78 -0.44 [-0.61 -0.24]
## 79 0.37 [0.16 0.55]
## 80 -0.23 [-0.43 0.00]
## 81 -0.16 [-0.37 0.07]
## 82 -0.13 [-0.35 0.10]
## 83 -0.02 [-0.25 0.21]
## 84 -0.31 [-0.50 -0.09]
## 85 1
## 86 -0.05 [-0.28 0.18]
## 87 0.32 [0.10 0.51]
## 88 -0.51 [-0.66 -0.32]
## 89 0.74 [0.62 0.83]
## 90 -0.77 [-0.85 -0.66]
## 91 0.86 [0.78 0.91]
## 92 0.88 [0.82 0.92]
## 93 -0.51 [-0.66 -0.32]
## 94 -0.45 [-0.61 -0.25]
## 95 0.58 [0.41 0.71]
## 96 -0.05 [-0.28 0.18]
## 97 1
## 98 -0.78 [-0.86 -0.68]
## 99 0.81 [0.71 0.87]
## 100 -0.93 [-0.96 -0.90]
## 101 0.99 [0.99 0.99]
## 102 -0.56 [-0.70 -0.38]
## 103 -0.55 [-0.69 -0.37]
## 104 0.10 [-0.13 0.32]
## 105 0.19 [-0.03 0.40]
## 106 -0.83 [-0.89 -0.75]
## 107 0.32 [0.10 0.51]
## 108 -0.78 [-0.86 -0.68]
## 109 1
## 110 -0.96 [-0.97 -0.93]
## 111 0.95 [0.92 0.97]
## 112 -0.97 [-0.98 -0.95]
## 113 0.69 [0.55 0.79]
## 114 0.67 [0.53 0.78]
## 115 -0.18 [-0.39 0.05]
## 116 -0.27 [-0.46 -0.04]
## 117 0.78 [0.67 0.85]
## 118 -0.51 [-0.66 -0.32]
## 119 0.81 [0.71 0.87]
## 120 -0.96 [-0.97 -0.93]
## 121 1
#2PL vs. 3PN discrimination
GG_scatter(item_params, "discrimination", "discrimination_3pl") + theme_large

#nonsense finding!
GG_scatter(item_params, "discrimination", "discrimination_2pl") + theme_large

#fairly close
GG_scatter(item_params, "difficulty", "difficulty_2pl") + theme_large

#main finding with new discrims
GG_scatter(item_params, "discrimination_3pl", "item_fertility") + theme_large

GG_scatter(item_params, "discrimination_2pl", "item_fertility") + theme_large

#model
lm(item_fertility ~ discrimination_2pl + difficulty_2pl, data = item_params) %>% MOD_summary(kfold = F)
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## discrimination_2pl -1.11 0.14 -1.4 -0.84
## difficulty_2pl -0.76 0.14 -1.0 -0.49
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 item_fertility 75 72 0.49 0.47 NA
##
##
## Etas from analysis of variance
## Eta Eta_partial
## discrimination_2pl 0.69 0.69
## difficulty_2pl 0.48 0.55
##try without items with extreme pass rates (<.05 or >.95
#subset item data
nonextreme_items = is_between(item_params$pass_rate, .05, .95) %>% which
#make list for sub-analysis
no_extreme = list(
item_count = nonextreme_items %>% length,
#items
cog_items = cog_items[nonextreme_items]
)
#irt
no_extreme[["irt_fa"]] = irt.fa(no_extreme$cog_items)

no_extreme[["irt_fa_2f"]] = irt.fa(no_extreme$cog_items, nfactors = 2, plot = F)
## Loading required namespace: GPArotation
## The estimated weights for the factor scores are probably incorrect. Try a different factor extraction method.
no_extreme[["irt_fa_3f"]] = irt.fa(no_extreme$cog_items, nfactors = 3, plot = F)
## The estimated weights for the factor scores are probably incorrect. Try a different factor extraction method.
no_extreme[["irt_fa_3pl"]] = ltm::tpm(no_extreme$cog_items)
no_extreme[["irt_fa_2pl"]] = ltm::tpm(no_extreme$cog_items, constraint = matrix(c(1:no_extreme$item_count, rep(1, no_extreme$item_count), rep(0, no_extreme$item_count)), ncol = 3))
#model outputs
no_extreme[["irt_fa"]]
## Item Response Analysis using Factor Analysis
##
## Call: irt.fa(x = no_extreme$cog_items)
## Item Response Analysis using Factor Analysis
##
## Summary information by factor and item
## Factor = 1
## -3 -2 -1 0 1 2 3
## item_12 0.21 0.43 0.39 0.16 0.05 0.01 0.00
## item_15 0.31 0.45 0.28 0.10 0.03 0.01 0.00
## item_16 0.29 0.59 0.41 0.13 0.03 0.01 0.00
## item_17 0.49 0.77 0.31 0.06 0.01 0.00 0.00
## item_19 0.18 0.32 0.30 0.16 0.06 0.02 0.01
## item_20 0.25 0.58 0.47 0.15 0.04 0.01 0.00
## item_22 0.08 0.31 0.63 0.40 0.12 0.03 0.01
## item_23 0.23 0.36 0.28 0.13 0.04 0.01 0.00
## item_24 0.24 0.72 0.58 0.15 0.03 0.00 0.00
## item_25 0.12 0.24 0.28 0.20 0.09 0.04 0.01
## item_26 0.29 0.98 0.60 0.11 0.01 0.00 0.00
## item_27 0.23 0.53 0.46 0.16 0.04 0.01 0.00
## item_28 0.20 0.51 0.49 0.19 0.05 0.01 0.00
## item_29 0.17 0.34 0.36 0.19 0.07 0.02 0.01
## item_30 0.33 0.79 0.47 0.11 0.02 0.00 0.00
## item_31 0.10 0.29 0.45 0.31 0.11 0.03 0.01
## item_32 0.08 0.28 0.59 0.42 0.13 0.03 0.01
## item_33 0.06 0.15 0.29 0.31 0.19 0.08 0.03
## item_34 0.12 0.30 0.41 0.26 0.10 0.03 0.01
## item_35 0.02 0.06 0.15 0.27 0.29 0.18 0.08
## item_36 0.10 0.29 0.48 0.33 0.12 0.03 0.01
## item_37 0.08 0.32 0.68 0.42 0.11 0.02 0.00
## item_38 0.04 0.15 0.39 0.47 0.24 0.07 0.02
## item_39 0.10 0.28 0.44 0.30 0.11 0.03 0.01
## item_40 0.09 0.36 0.66 0.37 0.10 0.02 0.00
## item_41 0.02 0.14 0.75 0.94 0.21 0.03 0.00
## item_43 0.03 0.10 0.28 0.44 0.30 0.11 0.03
## item_44 0.02 0.15 0.76 0.91 0.21 0.03 0.00
## item_45 0.07 0.34 0.79 0.45 0.10 0.02 0.00
## item_46 0.06 0.10 0.15 0.16 0.12 0.08 0.04
## item_47 0.04 0.07 0.09 0.11 0.10 0.08 0.05
## item_48 0.03 0.08 0.21 0.36 0.32 0.15 0.05
## item_50 0.00 0.04 0.24 0.90 0.67 0.13 0.02
## item_53 0.05 0.15 0.37 0.43 0.22 0.07 0.02
## item_54 0.04 0.07 0.11 0.14 0.13 0.10 0.06
## item_55 0.02 0.07 0.27 0.60 0.43 0.13 0.03
## item_57 0.03 0.07 0.12 0.17 0.17 0.12 0.07
## item_58 0.03 0.06 0.13 0.21 0.22 0.15 0.08
## item_59 0.02 0.06 0.14 0.25 0.28 0.18 0.08
## item_60 0.04 0.09 0.19 0.28 0.24 0.13 0.05
## item_61 0.04 0.08 0.13 0.16 0.15 0.10 0.05
## item_62 0.03 0.05 0.08 0.09 0.09 0.08 0.06
## item_63 0.03 0.08 0.17 0.26 0.24 0.14 0.06
## item_64 0.04 0.07 0.12 0.17 0.16 0.12 0.06
## item_65 0.04 0.08 0.15 0.20 0.19 0.12 0.06
## item_66 0.02 0.07 0.19 0.36 0.34 0.17 0.06
## item_70 0.04 0.07 0.10 0.12 0.12 0.09 0.06
## item_71 0.04 0.07 0.11 0.14 0.14 0.11 0.07
## Test Info 5.18 12.56 16.48 13.69 7.34 3.15 1.30
## SEM 0.44 0.28 0.25 0.27 0.37 0.56 0.88
## Reliability 0.81 0.92 0.94 0.93 0.86 0.68 0.23
##
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate,
## fm = fm)
##
## Test of the hypothesis that 1 factor is sufficient.
## The degrees of freedom for the model is 1710 and the objective function was 19
## The number of observations was 1409 with Chi Square = 26881 with prob < 0
##
## The root mean square of the residuals (RMSA) is 0.08
## The df corrected root mean square of the residuals is 0.08
##
## Tucker Lewis Index of factoring reliability = 0.51
## RMSEA index = 0.1 and the 10 % confidence intervals are 0.1 0.1
## BIC = 14482
no_extreme[["irt_fa_2f"]]
## Item Response Analysis using Factor Analysis
##
## Call: irt.fa(x = no_extreme$cog_items, nfactors = 2, plot = F)
##
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate,
## fm = fm)
##
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 1651 and the objective function was 15
## The number of observations was 1409 with Chi Square = 21261 with prob < 0
##
## The root mean square of the residuals (RMSA) is 0.05
## The df corrected root mean square of the residuals is 0.05
##
## Tucker Lewis Index of factoring reliability = 0.6
## RMSEA index = 0.093 and the 10 % confidence intervals are 0.091 0.093
## BIC = 9290
## With factor correlations of
## MR1 MR2
## MR1 1.00 0.48
## MR2 0.48 1.00
no_extreme[["irt_fa_3f"]]
## Item Response Analysis using Factor Analysis
##
## Call: irt.fa(x = no_extreme$cog_items, nfactors = 3, plot = F)
##
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate,
## fm = fm)
##
## Test of the hypothesis that 3 factors are sufficient.
## The degrees of freedom for the model is 1593 and the objective function was 14
## The number of observations was 1409 with Chi Square = 19941 with prob < 0
##
## The root mean square of the residuals (RMSA) is 0.04
## The df corrected root mean square of the residuals is 0.04
##
## Tucker Lewis Index of factoring reliability = 0.61
## RMSEA index = 0.091 and the 10 % confidence intervals are 0.089 0.092
## BIC = 8391
## With factor correlations of
## MR1 MR2 MR3
## MR1 1.00 0.42 0.45
## MR2 0.42 1.00 0.45
## MR3 0.45 0.45 1.00
#item params
no_extreme$item_params = data_frame(
item = names(no_extreme$cog_items),
word = fertility_data$Word[nonextreme_items],
pass_rate = no_extreme$cog_items %>% colMeans(),
difficulty = no_extreme$irt_fa$irt$difficulty[[1]],
loading = no_extreme$irt_fa$fa$loadings %>% as.vector(),
discrimination = no_extreme$irt_fa$irt$discrimination %>% as.vector,
discrimination_2pl = no_extreme$irt_fa_2pl$coefficients[, 3],
discrimination_3pl = no_extreme$irt_fa_3pl$coefficients[, 3]
)
#item x fertility relations
no_extreme$item_params$item_fertility = map_dbl(no_extreme$item_params$item, function(x) {
#extract and calculate and extract
d[c(x, "offspring")] %>%
#as normal data frame
as.data.frame %>%
#correlate
polycor::hetcor() %>%
#extract r
.[[1]] %>% .[1, 2]
})
#standard errors
no_extreme$item_params$item_fertility_se = map_dbl(no_extreme$item_params$item, function(x) {
#extract and calculate and extract
d[c(x, "offspring")] %>%
#as normal data frame
as.data.frame() %>%
#correlate
polycor::hetcor() %>%
#extract r
.[["std.errors"]] %>% .[1, 2]
})
#cors
cor_matrix(no_extreme$item_params[-c(1:2)], CI = .95)
## pass_rate difficulty
## pass_rate "1" "-0.99 [-0.99 -0.98]"
## difficulty "-0.99 [-0.99 -0.98]" "1"
## loading "0.72 [0.57 0.82]" "-0.69 [-0.81 -0.54]"
## discrimination "0.70 [0.54 0.81]" "-0.68 [-0.79 -0.51]"
## discrimination_2pl "0.80 [0.69 0.88]" "-0.83 [-0.90 -0.73]"
## discrimination_3pl "-0.42 [-0.61 -0.19]" "0.38 [0.14 0.58]"
## item_fertility "-0.49 [-0.66 -0.27]" "0.49 [0.27 0.66]"
## item_fertility_se "-0.44 [-0.62 -0.21]" "0.44 [0.21 0.62]"
## loading discrimination
## pass_rate "0.72 [0.57 0.82]" "0.70 [0.54 0.81]"
## difficulty "-0.69 [-0.81 -0.54]" "-0.68 [-0.79 -0.51]"
## loading "1" "0.98 [0.97 0.99]"
## discrimination "0.98 [0.97 0.99]" "1"
## discrimination_2pl "0.90 [0.84 0.94]" "0.92 [0.87 0.95]"
## discrimination_3pl "-0.35 [-0.56 -0.11]" "-0.30 [-0.51 -0.05]"
## item_fertility "-0.63 [-0.76 -0.45]" "-0.63 [-0.76 -0.45]"
## item_fertility_se "-0.53 [-0.69 -0.31]" "-0.54 [-0.70 -0.33]"
## discrimination_2pl discrimination_3pl
## pass_rate "0.80 [0.69 0.88]" "-0.42 [-0.61 -0.19]"
## difficulty "-0.83 [-0.90 -0.73]" "0.38 [0.14 0.58]"
## loading "0.90 [0.84 0.94]" "-0.35 [-0.56 -0.11]"
## discrimination "0.92 [0.87 0.95]" "-0.30 [-0.51 -0.05]"
## discrimination_2pl "1" "-0.24 [-0.46 0.02]"
## discrimination_3pl "-0.24 [-0.46 0.02]" "1"
## item_fertility "-0.61 [-0.75 -0.43]" "0.16 [-0.09 0.40]"
## item_fertility_se "-0.53 [-0.69 -0.33]" "0.15 [-0.11 0.39]"
## item_fertility item_fertility_se
## pass_rate "-0.49 [-0.66 -0.27]" "-0.44 [-0.62 -0.21]"
## difficulty "0.49 [0.27 0.66]" "0.44 [0.21 0.62]"
## loading "-0.63 [-0.76 -0.45]" "-0.53 [-0.69 -0.31]"
## discrimination "-0.63 [-0.76 -0.45]" "-0.54 [-0.70 -0.33]"
## discrimination_2pl "-0.61 [-0.75 -0.43]" "-0.53 [-0.69 -0.33]"
## discrimination_3pl "0.16 [-0.09 0.40]" "0.15 [-0.11 0.39]"
## item_fertility "1" "0.90 [0.84 0.94]"
## item_fertility_se "0.90 [0.84 0.94]" "1"
#plots
GG_scatter(no_extreme$item_params, "loading", "item_fertility", case_names = "item") + theme_large

GG_save("figs/ne_loading_fert.png")
GG_scatter(no_extreme$item_params, "difficulty", "item_fertility", case_names = "item") + theme_large

GG_save("figs/ne_difficulty_fert.png")
#model
lm(item_fertility ~ loading + difficulty, data = no_extreme$item_params) %>% MOD_summary(kfold = F)
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## loading -0.561 0.14 -0.85 -0.28
## difficulty 0.097 0.14 -0.19 0.38
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 item_fertility 60 57 0.4 0.38 NA
##
##
## Etas from analysis of variance
## Eta Eta_partial
## loading 0.40 0.46
## difficulty 0.07 0.09
Output data
Output summary stats for reuse.
#main
write_csv(item_params, "data/item_params.csv", na = "")
write_rds(item_params, "data/item_params.rds")
#no extreme items
write_csv(no_extreme$item_params, "data/item_params_no_extreme.csv", na = "")
write_rds(no_extreme$item_params, "data/item_params_no_extreme.rds")