Init

options(digits = 2)
library(pacman)
p_load(kirkegaard, readr, ltm, rms)

#theme design
theme_large = theme(text = element_text(size = 15))

Data

d = readxl::read_xlsx("data/WFK04.xlsx") %>% na.omit()

#fix names
item_names = "item_" + 1:75
names(d) = c("item", item_names, "offspring")

#scores
cog_items = d[item_names]

#item data
fertility_data = readxl::read_xlsx("data/Fertility-pass rate.xlsx")

#recoded offspring
d$offspring_ordinal = ordered(d$offspring)
table2(d$offspring)

Main IRT analysis

#item x fertility relations
item_fertility = map_dbl(item_names, function(x) {
  #extract item
  d[c(x, "offspring")] %>%
    #as normal data frame
    as.data.frame %>%
    #correlate
    polycor::hetcor() %>%
    #extract r
    .[[1]] %>% .[1, 2]
})

#ordinal model, alternative
item_fertility_ordinal = map_dbl(item_names, function(x) {
  #extract item
  d_ = d[c(x, "offspring")] %>% 
    #as normal data frame
    as.data.frame
  colnames(d_) = c("item", "offspring")
  
  #model
  rms::orm(item ~ offspring, data = d_) %>% coef %>% .[2]
})

#compare methods
data_frame(polychoric = item_fertility, ordinal = item_fertility_ordinal) %>% 
  GG_scatter("polychoric", "ordinal") +
  theme_large

#standard errors
item_fertility_se = map_dbl(item_names, function(x) {
  #extract and calculate and extract
  d[c(x, "offspring")] %>% 
    #as normal data frame
    as.data.frame() %>%
    #correlate
    polycor::hetcor() %>% 
    #extract r
    .[["std.errors"]] %>% .[1, 2]
})

#fit IRT fa
irt_fa = irt.fa(cog_items)

## Warning in cor.smooth(mat): Matrix was not positive definite, smoothing was
## done

## The estimated weights for the factor scores are probably incorrect.  Try a different factor extraction method.

irt_fa

## Item Response Analysis using Factor Analysis 
## 
## Call: irt.fa(x = cog_items)
## Item Response Analysis using Factor Analysis  
## 
##  Summary information by factor and item
##  Factor =  1 
##                -3    -2    -1     0    1    2    3
## item_1       0.09  0.08  0.07  0.05 0.03 0.02 0.01
## item_2       0.28  0.24  0.13  0.05 0.02 0.01 0.00
## item_4       0.24  0.20  0.12  0.05 0.02 0.01 0.00
## item_5       0.34  0.26  0.12  0.04 0.01 0.00 0.00
## item_6       0.24  0.21  0.12  0.05 0.02 0.01 0.00
## item_7       0.21  0.17  0.10  0.05 0.02 0.01 0.00
## item_8       0.89  0.37  0.07  0.01 0.00 0.00 0.00
## item_9       0.93  0.33  0.06  0.01 0.00 0.00 0.00
## item_10      0.45  0.37  0.15  0.04 0.01 0.00 0.00
## item_11      0.49  0.42  0.16  0.04 0.01 0.00 0.00
## item_12      0.25  0.57  0.44  0.15 0.03 0.01 0.00
## item_13      0.39  0.34  0.15  0.05 0.01 0.00 0.00
## item_14      0.65  0.52  0.15  0.03 0.01 0.00 0.00
## item_15      0.33  0.47  0.28  0.09 0.03 0.01 0.00
## item_16      0.36  0.77  0.41  0.10 0.02 0.00 0.00
## item_17      0.55  0.82  0.28  0.05 0.01 0.00 0.00
## item_18      0.29  0.39  0.25  0.10 0.03 0.01 0.00
## item_19      0.23  0.45  0.38  0.15 0.05 0.01 0.00
## item_20      0.27  0.66  0.48  0.14 0.03 0.01 0.00
## item_21      0.71  0.62  0.16  0.03 0.00 0.00 0.00
## item_22      0.08  0.33  0.68  0.41 0.11 0.02 0.00
## item_23      0.23  0.36  0.28  0.13 0.04 0.01 0.00
## item_24      0.24  0.74  0.58  0.15 0.03 0.00 0.00
## item_25      0.13  0.29  0.35  0.22 0.09 0.03 0.01
## item_26      0.31  1.11  0.58  0.09 0.01 0.00 0.00
## item_27      0.25  0.60  0.48  0.15 0.03 0.01 0.00
## item_28      0.22  0.59  0.53  0.17 0.04 0.01 0.00
## item_29      0.18  0.40  0.41  0.19 0.06 0.02 0.00
## item_30      0.34  0.84  0.46  0.10 0.02 0.00 0.00
## item_31      0.10  0.27  0.42  0.29 0.11 0.03 0.01
## item_32      0.08  0.26  0.49  0.38 0.14 0.04 0.01
## item_33      0.06  0.13  0.22  0.24 0.16 0.08 0.03
## item_34      0.12  0.27  0.35  0.24 0.10 0.03 0.01
## item_35      0.03  0.07  0.13  0.19 0.20 0.14 0.07
## item_36      0.10  0.27  0.44  0.31 0.12 0.04 0.01
## item_37      0.08  0.32  0.67  0.42 0.11 0.02 0.00
## item_38      0.05  0.15  0.33  0.39 0.22 0.08 0.02
## item_39      0.10  0.29  0.46  0.31 0.11 0.03 0.01
## item_40      0.09  0.36  0.66  0.37 0.10 0.02 0.00
## item_41      0.02  0.15  0.65  0.81 0.23 0.04 0.01
## item_43      0.04  0.11  0.25  0.36 0.26 0.11 0.04
## item_44      0.03  0.16  0.63  0.76 0.23 0.04 0.01
## item_45      0.07  0.33  0.74  0.44 0.11 0.02 0.00
## item_46      0.06  0.09  0.12  0.13 0.10 0.07 0.04
## item_47      0.04  0.07  0.10  0.12 0.11 0.08 0.05
## item_48      0.03  0.08  0.19  0.30 0.26 0.14 0.06
## item_50      0.01  0.06  0.25  0.64 0.50 0.15 0.03
## item_53      0.05  0.15  0.29  0.33 0.20 0.08 0.03
## item_54      0.04  0.05  0.07  0.08 0.08 0.07 0.05
## item_55      0.03  0.09  0.24  0.42 0.33 0.14 0.04
## item_57      0.03  0.06  0.11  0.14 0.14 0.11 0.07
## item_58      0.03  0.06  0.10  0.13 0.13 0.10 0.07
## item_59      0.03  0.06  0.11  0.15 0.15 0.12 0.07
## item_60      0.04  0.09  0.16  0.22 0.19 0.12 0.06
## item_61      0.04  0.08  0.11  0.13 0.12 0.09 0.05
## item_63      0.04  0.08  0.14  0.19 0.18 0.12 0.06
## item_64      0.04  0.07  0.11  0.14 0.13 0.10 0.06
## item_65      0.04  0.08  0.14  0.19 0.17 0.11 0.06
## item_66      0.03  0.07  0.17  0.28 0.27 0.15 0.06
## item_70      0.04  0.07  0.10  0.12 0.11 0.09 0.06
## item_71      0.04  0.06  0.08  0.09 0.09 0.08 0.05
## Test Info   11.79 17.99 17.49 12.21 6.29 2.86 1.28
## SEM          0.29  0.24  0.24  0.29 0.40 0.59 0.88
## Reliability  0.92  0.94  0.94  0.92 0.84 0.65 0.22
## 
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## 
## Test of the hypothesis that 1 factor is sufficient.
## The degrees of freedom for the model is 2700  and the objective function was  263 
## The number of observations was  1409  with Chi Square =  363017  with prob <  0 
## 
## The root mean square of the residuals (RMSA) is  0.1 
## The df corrected root mean square of the residuals is  0.1 
## 
## Tucker Lewis Index of factoring reliability =  0.062
## RMSEA index =  0.31  and the 10 % confidence intervals are  0.31 NA
## BIC =  343440

#item params
item_params = data_frame(
  item = names(cog_items) %>% str_replace("item_", ""),
  word = fertility_data$Word,
  pass_rate = cog_items %>% colMeans(),
  difficulty = irt_fa$irt$difficulty[[1]],
  loading = irt_fa$fa$loadings[, 1],
  discrimination = irt_fa$irt$discrimination %>% as.vector,
  item_fertility = item_fertility,
  item_fertility_se = item_fertility_se,
  pass_rate_from_50 = abs(pass_rate - .5)
)

#print
print(item_params %>% dplyr::select(-word), n=Inf)

## # A tibble: 75 x 8
##    item  pass_rate difficulty loading discrimination item_fertility
##    <chr>     <dbl>      <dbl>   <dbl>          <dbl>          <dbl>
##  1 1         0.994    -2.68     0.328          0.348        0.0365 
##  2 2         0.991    -2.78     0.532          0.629       -0.0148 
##  3 3         0.982    -2.14     0.211          0.216        0.0400 
##  4 4         0.993    -2.82     0.496          0.572        0.00583
##  5 5         0.992    -2.93     0.563          0.681       -0.00389
##  6 6         0.992    -2.80     0.502          0.581        0.00946
##  7 7         0.996    -2.99     0.478          0.544       -0.0135 
##  8 8         0.980    -3.07     0.743          1.11        -0.0659 
##  9 9         0.981    -3.15     0.753          1.14        -0.0910 
## 10 10        0.983    -2.71     0.624          0.799       -0.0318 
## 11 11        0.978    -2.64     0.647          0.849       -0.0357 
## 12 12        0.899    -1.72     0.671          0.906       -0.0770 
## 13 13        0.984    -2.69     0.600          0.749       -0.0766 
## 14 14        0.972    -2.67     0.701          0.982       -0.0589 
## 15 15        0.948    -2.10     0.630          0.811       -0.0514 
## 16 16        0.912    -1.94     0.717          1.03        -0.0781 
## 17 17        0.935    -2.24     0.738          1.09        -0.0745 
## 18 18        0.956    -2.12     0.591          0.733       -0.00238
## 19 19        0.906    -1.69     0.627          0.804       -0.0129 
## 20 20        0.893    -1.73     0.698          0.975       -0.0625 
## 21 21        0.962    -2.59     0.728          1.06        -0.0681 
## 22 22        0.739    -0.893    0.697          0.973       -0.0352 
## 23 23        0.933    -1.84     0.576          0.705       -0.0145 
## 24 24        0.872    -1.66     0.728          1.06        -0.0739 
## 25 25        0.840    -1.21     0.574          0.701       -0.0994 
## 26 26        0.871    -1.82     0.784          1.26        -0.103  
## 27 27        0.891    -1.68     0.683          0.935       -0.0812 
## 28 28        0.874    -1.59     0.691          0.957       -0.0474 
## 29 29        0.877    -1.48     0.622          0.794       -0.0348 
## 30 30        0.900    -1.89     0.734          1.08        -0.0465 
## 31 31        0.776    -0.953    0.606          0.762       -0.0375 
## 32 32        0.725    -0.781    0.642          0.836       -0.0490 
## 33 33        0.618    -0.349    0.506          0.587       -0.0703 
## 34 34        0.811    -1.07     0.573          0.700       -0.0498 
## 35 35        0.304     0.581    0.473          0.537       -0.0565 
## 36 36        0.764    -0.909    0.614          0.777       -0.0618 
## 37 37        0.737    -0.885    0.696          0.969       -0.112  
## 38 38        0.586    -0.272    0.599          0.748       -0.0392 
## 39 39        0.774    -0.964    0.624          0.798       -0.0512 
## 40 40        0.762    -0.985    0.689          0.952       -0.0822 
## 41 41        0.598    -0.372    0.747          1.12        -0.0494 
## 42 42        0.476     0.0616   0.250          0.258        0.0124 
## 43 43        0.491     0.0272   0.574          0.701       -0.0278 
## 44 44        0.603    -0.385    0.738          1.09        -0.0664 
## 45 45        0.732    -0.882    0.713          1.02        -0.0523 
## 46 46        0.608    -0.298    0.389          0.423       -0.0204 
## 47 47        0.409     0.249    0.374          0.404        0.0191 
## 48 48        0.400     0.301    0.543          0.647       -0.0384 
## 49 49        0.519    -0.0486   0.240          0.247        0.0112 
## 50 50        0.412     0.308    0.695          0.967       -0.0548 
## 51 51        0.462     0.0972   0.197          0.201       -0.0210 
## 52 52        0.289     0.570    0.215          0.220       -0.0368 
## 53 53        0.599    -0.304    0.566          0.687       -0.0388 
## 54 54        0.348     0.412    0.315          0.332       -0.0183 
## 55 55        0.435     0.206    0.610          0.769       -0.103  
## 56 56        0.336     0.434    0.209          0.214       -0.0310 
## 57 57        0.312     0.537    0.413          0.453       -0.0212 
## 58 58        0.289     0.606    0.394          0.428       -0.0368 
## 59 59        0.275     0.658    0.423          0.467       -0.00967
## 60 60        0.429     0.203    0.485          0.554       -0.0260 
## 61 61        0.456     0.121    0.392          0.426       -0.0254 
## 62 62        0.309     0.520    0.277          0.288       -0.0302 
## 63 63        0.390     0.313    0.459          0.516        0.0110 
## 64 64        0.356     0.404    0.401          0.437       -0.0666 
## 65 65        0.395     0.298    0.456          0.513       -0.0600 
## 66 66        0.360     0.427    0.541          0.644       -0.0389 
## 67 67        0.147     1.07     0.200          0.204       -0.0129 
## 68 68        0.203     0.846    0.189          0.193       -0.0304 
## 69 69        0.253     0.675    0.183          0.186       -0.0152 
## 70 70        0.365     0.373    0.377          0.407       -0.0267 
## 71 71        0.332     0.461    0.339          0.360       -0.0699 
## 72 72        0.202     0.850    0.181          0.184        0.0283 
## 73 73        0.290     0.565    0.192          0.195       -0.0440 
## 74 74        0.203     0.843    0.171          0.173       -0.00854
## 75 75        0.262     0.648    0.178          0.181       -0.0244 
## # ... with 2 more variables: item_fertility_se <dbl>,
## #   pass_rate_from_50 <dbl>

#cors
cor_matrix(item_params[-c(1:2)], CI = .95) %>% write_clipboard()

##                      X
## 1                    1
## 2  -0.97 [-0.98 -0.95]
## 3     0.68 [0.54 0.79]
## 4     0.67 [0.52 0.78]
## 5   -0.19 [-0.40 0.04]
## 6  -0.27 [-0.47 -0.05]
## 7     0.75 [0.63 0.83]
## 8  -0.97 [-0.98 -0.95]
## 9                    1
## 10 -0.61 [-0.73 -0.44]
## 11 -0.60 [-0.73 -0.43]
## 12   0.13 [-0.10 0.35]
## 13    0.22 [0.00 0.43]
## 14 -0.83 [-0.89 -0.74]
## 15    0.68 [0.54 0.79]
## 16 -0.61 [-0.73 -0.44]
## 17                   1
## 18    0.99 [0.98 0.99]
## 19 -0.61 [-0.74 -0.45]
## 20 -0.52 [-0.67 -0.34]
## 21    0.33 [0.11 0.52]
## 22    0.67 [0.52 0.78]
## 23 -0.60 [-0.73 -0.43]
## 24    0.99 [0.98 0.99]
## 25                   1
## 26 -0.64 [-0.76 -0.48]
## 27 -0.56 [-0.70 -0.38]
## 28    0.34 [0.13 0.53]
## 29  -0.19 [-0.40 0.04]
## 30   0.13 [-0.10 0.35]
## 31 -0.61 [-0.74 -0.45]
## 32 -0.64 [-0.76 -0.48]
## 33                   1
## 34    0.86 [0.79 0.91]
## 35  -0.02 [-0.25 0.20]
## 36 -0.27 [-0.47 -0.05]
## 37    0.22 [0.00 0.43]
## 38 -0.52 [-0.67 -0.34]
## 39 -0.56 [-0.70 -0.38]
## 40    0.86 [0.79 0.91]
## 41                   1
## 42  -0.12 [-0.34 0.11]
## 43    0.75 [0.63 0.83]
## 44 -0.83 [-0.89 -0.74]
## 45    0.33 [0.11 0.52]
## 46    0.34 [0.13 0.53]
## 47  -0.02 [-0.25 0.20]
## 48  -0.12 [-0.34 0.11]
## 49                   1

wtd.cors(item_params[-c(1:2)]) %>% .[5, 1:5]

##      pass_rate     difficulty        loading discrimination item_fertility 
##          -0.19           0.13          -0.61          -0.64           1.00

cor(item_params[-c(1:2)], use = "pairwise.complete.obs") %>% .[5, 1:5]

##      pass_rate     difficulty        loading discrimination item_fertility 
##          -0.19           0.13          -0.61          -0.64           1.00

cor(item_params[-c(1:2)], use = "complete.obs") %>% .[5, 1:5]

##      pass_rate     difficulty        loading discrimination item_fertility 
##          -0.19           0.13          -0.61          -0.64           1.00

#corr.test
corr.test(data.frame(item_params$loading, item_params$item_fertility)) %>% print(short=F)

## Call:corr.test(x = data.frame(item_params$loading, item_params$item_fertility))
## Correlation matrix 
##                            item_params.loading item_params.item_fertility
## item_params.loading                       1.00                      -0.61
## item_params.item_fertility               -0.61                       1.00
## Sample Size 
## [1] 75
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##                            item_params.loading item_params.item_fertility
## item_params.loading                          0                          0
## item_params.item_fertility                   0                          0
## 
##  Confidence intervals based upon normal theory.  To get bootstrapped values, try cor.ci
##             raw.lower raw.r raw.upper raw.p lower.adj upper.adj
## itm_.-it_._     -0.74 -0.61     -0.45     0     -0.74     -0.45

#score persons
irt_fa_score = scoreIrt(irt_fa, cog_items)

#save and standardize
d$theta = irt_fa_score$theta1 %>% standardize()

#plot
GG_denhist(d, "theta") + theme_large

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

describe(d$theta) %>% as.data.frame()

GG_save("figs/theta_dist.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_scatter(item_params, "loading", "item_fertility", case_names = "item") +
  xlab("Item loading") + ylab("Item-fertility latent correlation") +
  theme_large

GG_save("figs/loading_fert.png")
GG_scatter(item_params, "difficulty", "item_fertility", case_names = "item") +
  xlab("Item difficulty") + ylab("Item-fertility latent correlation") +
  theme_large

GG_save("figs/difficulty_fert.png")

#model
lm(item_fertility ~ loading + difficulty, data = item_params) %>% MOD_summary(kfold = F)

## 
##     ---- Model summary ----    
## Model coefficients
##             Beta   SE CI_lower CI_upper
## loading    -0.84 0.11     -1.1    -0.63
## difficulty -0.38 0.11     -0.6    -0.17
## 
## 
## Model meta-data
##          outcome  N df   R2 R2-adj. R2-cv
## 1 item_fertility 75 72 0.47    0.45    NA
## 
## 
## Etas from analysis of variance
##             Eta Eta_partial
## loading    0.67        0.68
## difficulty 0.30        0.38

Person-level analysis

#estimate relationship between g and fertility using polr
(polr_fit = MASS::polr(offspring_ordinal ~ theta, data = d) %>% summary())

## 
## Re-fitting to get Hessian

## Call:
## MASS::polr(formula = offspring_ordinal ~ theta, data = d)
## 
## Coefficients:
##        Value Std. Error t value
## theta -0.189     0.0489   -3.86
## 
## Intercepts:
##     Value   Std. Error t value
## 0|1  -1.072   0.061    -17.501
## 1|2  -0.343   0.054     -6.320
## 2|3   1.539   0.070     22.026
## 3|4   2.799   0.114     24.544
## 4|5   4.162   0.215     19.354
## 5|6   5.880   0.501     11.742
## 
## Residual Deviance: 4122.66 
## AIC: 4136.66

#latent correlation
(latent_cor_fit = polycor::hetcor(d[c("offspring_ordinal", "theta")] %>% as.data.frame()))

## 
## Two-Step Estimates
## 
## Correlations/Type of Correlation:
##                   offspring_ordinal      theta
## offspring_ordinal                 1 Polyserial
## theta                        -0.114          1
## 
## Standard Errors:
## offspring_ordinal             theta 
##                              0.0276 
## Levels:  0.0276
## 
## n = 1409 
## 
## P-values for Tests of Bivariate Normality:
## offspring_ordinal             theta 
##                            2.16e-17 
## Levels:  2.16e-17

#plot to look for nonlinear effects
GG_scatter(d, "theta", "offspring") +
  geom_smooth() +
  theme_large

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#distributions
GG_denhist(d, "theta") + theme_large

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_denhist(d, "offspring") + theme_large

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Other IRT analyses

#extra IRT models
#using the ltm library
irt_fa_3pl = ltm::tpm(cog_items)
irt_fa_2pl = ltm::tpm(cog_items, constraint = matrix(c(1:75, rep(1, 75), rep(0, 75)), ncol = 3))

#output params
irt_fa_3pl

## 
## Call:
## ltm::tpm(data = cog_items)
## 
## Coefficients:
##          Gussng   Dffclt  Dscrmn
## item_1    0.003   -6.674    0.84
## item_2    0.001   -4.050    1.46
## item_3    0.058   -9.951    0.40
## item_4    0.000   -4.210    1.49
## item_5    0.000   -3.892    1.66
## item_6    0.000   -4.204    1.45
## item_7    0.000   -4.554    1.51
## item_8    0.000   -2.594    2.95
## item_9    0.277   -2.387    3.17
## item_10   0.000   -3.351    1.64
## item_11   0.000   -3.162    1.64
## item_12   0.000   -2.007    1.49
## item_13   0.000   -3.518    1.55
## item_14   0.292   -2.300    2.42
## item_15   0.354   -2.034    1.79
## item_16   0.266   -1.540    2.24
## item_17   0.318   -1.581    3.02
## item_18   0.000   -2.955    1.33
## item_19   0.000   -2.245    1.30
## item_20   0.126   -1.656    1.81
## item_21   0.000   -2.519    1.98
## item_22   0.000   -0.961    1.53
## item_23   0.491   -1.620    1.75
## item_24   0.167   -1.333    2.21
## item_25   0.000   -1.918    1.05
## item_26   0.111   -1.331    2.47
## item_27   0.076   -1.693    1.80
## item_28   0.254   -1.313    1.94
## item_29   0.000   -1.954    1.31
## item_30   0.218   -1.457    2.37
## item_31   0.183   -0.923    1.46
## item_32   0.223   -0.490    1.92
## item_33   0.316    0.212    2.27
## item_34   0.256   -1.001    1.51
## item_35   0.104    0.957    2.28
## item_36   0.329   -0.478    2.08
## item_37   0.056   -0.843    1.66
## item_38   0.132   -0.068    1.65
## item_39   0.000   -1.288    1.24
## item_40   0.220   -0.653    2.10
## item_41   0.053   -0.218    2.17
## item_42   0.341    1.181    1.64
## item_43   0.103    0.265    1.56
## item_44   0.098   -0.157    2.43
## item_45   0.193   -0.528    2.31
## item_46   0.417    0.582    2.22
## item_47   0.141    1.004    0.92
## item_48   0.173    0.716    2.93
## item_49   0.446    1.198    3.85
## item_50   0.052    0.397    2.62
## item_51   0.379    1.312    2.54
## item_52   0.230    1.426    5.12
## item_53   0.209    0.048    2.00
## item_54   0.211    1.071    3.08
## item_55   0.117    0.460    2.57
## item_56   0.247    1.276    3.63
## item_57   0.151    1.116    2.17
## item_58   0.090    1.100    1.70
## item_59   0.112    1.066    2.69
## item_60   0.224    0.769    2.65
## item_61   0.260    0.849    1.93
## item_62   0.198    1.238    2.95
## item_63   0.200    0.853    2.74
## item_64   0.192    1.026    2.39
## item_65   0.132    0.856    1.26
## item_66   0.093    0.777    1.68
## item_67   0.091    1.757    2.66
## item_68   0.127    1.792    1.92
## item_69   0.184    1.420    4.05
## item_70   0.238    1.156    2.55
## item_71   0.197    1.074    3.32
## item_72   0.146    1.691    2.86
## item_73   0.197    1.623    1.81
## item_74   0.144    2.421    1.33
## item_75   0.205    1.761    2.34
## 
## Log.Lik: -41383

irt_fa_2pl

## 
## Call:
## ltm::tpm(data = cog_items, constraint = matrix(c(1:75, rep(1, 
##     75), rep(0, 75)), ncol = 3))
## 
## Coefficients:
##          Gussng   Dffclt  Dscrmn
## item_1        0   -8.148    0.65
## item_2        0   -2.573    2.44
## item_3        0  -10.200    0.39
## item_4        0   -3.137    1.90
## item_5        0   -2.678    2.39
## item_6        0   -2.830    2.17
## item_7        0   -3.022    2.29
## item_8        0   -1.815    4.23
## item_9        0   -1.832    4.19
## item_10       0   -2.337    2.36
## item_11       0   -2.113    2.60
## item_12       0   -1.504    1.96
## item_13       0   -2.456    2.23
## item_14       0   -1.873    3.03
## item_15       0   -1.827    2.22
## item_16       0   -1.437    2.48
## item_17       0   -1.472    3.14
## item_18       0   -2.229    1.71
## item_19       0   -1.704    1.67
## item_20       0   -1.391    2.18
## item_21       0   -1.758    2.99
## item_22       0   -0.755    1.80
## item_23       0   -1.852    1.84
## item_24       0   -1.195    2.52
## item_25       0   -1.587    1.18
## item_26       0   -1.109    3.07
## item_27       0   -1.394    2.13
## item_28       0   -1.294    2.14
## item_29       0   -1.463    1.72
## item_30       0   -1.318    2.68
## item_31       0   -1.000    1.52
## item_32       0   -0.715    1.75
## item_33       0   -0.385    1.28
## item_34       0   -1.172    1.52
## item_35       0    0.971    1.22
## item_36       0   -0.894    1.67
## item_37       0   -0.742    1.84
## item_38       0   -0.234    1.43
## item_39       0   -1.008    1.48
## item_40       0   -0.818    1.95
## item_41       0   -0.225    2.20
## item_42       0    0.303    0.56
## item_43       0    0.132    1.33
## item_44       0   -0.240    2.21
## item_45       0   -0.681    2.08
## item_46       0   -0.470    0.87
## item_47       0    0.754    0.63
## item_48       0    0.504    1.31
## item_49       0   -0.035    0.50
## item_50       0    0.358    2.12
## item_51       0    0.480    0.45
## item_52       0    1.910    0.54
## item_53       0   -0.285    1.42
## item_54       0    0.961    0.85
## item_55       0    0.318    1.66
## item_56       0    1.340    0.60
## item_57       0    1.112    0.93
## item_58       0    1.169    1.02
## item_59       0    1.161    1.15
## item_60       0    0.426    1.09
## item_61       0    0.356    0.84
## item_62       0    1.349    0.73
## item_63       0    0.608    1.09
## item_64       0    0.868    0.92
## item_65       0    0.670    0.88
## item_66       0    0.710    1.21
## item_67       0    2.933    0.68
## item_68       0    2.600    0.59
## item_69       0    2.141    0.57
## item_70       0    0.929    0.77
## item_71       0    1.028    0.89
## item_72       0    3.016    0.50
## item_73       0    2.035    0.49
## item_74       0    4.008    0.36
## item_75       0    2.800    0.40
## 
## Log.Lik: -41827

#add to item params
item_params$discrimination_3pl = irt_fa_3pl$coefficients[, 3]
item_params$discrimination_2pl = irt_fa_2pl$coefficients[, 3]
item_params$difficulty_2pl = irt_fa_2pl$coefficients[, 2] * -1
item_params$difficulty_3pl = irt_fa_3pl$coefficients[, 2]

#cors
cor_matrix(item_params[-c(1:2)], CI = .95) %>% write_clipboard()

##                       X
## 1                     1
## 2   -0.97 [-0.98 -0.95]
## 3      0.68 [0.54 0.79]
## 4      0.67 [0.52 0.78]
## 5    -0.19 [-0.40 0.04]
## 6   -0.27 [-0.47 -0.05]
## 7      0.75 [0.63 0.83]
## 8   -0.44 [-0.61 -0.24]
## 9      0.74 [0.62 0.83]
## 10  -0.93 [-0.96 -0.90]
## 11     0.95 [0.92 0.97]
## 12  -0.97 [-0.98 -0.95]
## 13                    1
## 14  -0.61 [-0.73 -0.44]
## 15  -0.60 [-0.73 -0.43]
## 16    0.13 [-0.10 0.35]
## 17     0.22 [0.00 0.43]
## 18  -0.83 [-0.89 -0.74]
## 19     0.37 [0.16 0.55]
## 20  -0.77 [-0.85 -0.66]
## 21     0.99 [0.99 0.99]
## 22  -0.97 [-0.98 -0.95]
## 23     0.68 [0.54 0.79]
## 24  -0.61 [-0.73 -0.44]
## 25                    1
## 26     0.99 [0.98 0.99]
## 27  -0.61 [-0.74 -0.45]
## 28  -0.52 [-0.67 -0.34]
## 29     0.33 [0.11 0.52]
## 30   -0.23 [-0.43 0.00]
## 31     0.86 [0.78 0.91]
## 32  -0.56 [-0.70 -0.38]
## 33     0.69 [0.55 0.79]
## 34     0.67 [0.52 0.78]
## 35  -0.60 [-0.73 -0.43]
## 36     0.99 [0.98 0.99]
## 37                    1
## 38  -0.64 [-0.76 -0.48]
## 39  -0.56 [-0.70 -0.38]
## 40     0.34 [0.13 0.53]
## 41   -0.16 [-0.37 0.07]
## 42     0.88 [0.82 0.92]
## 43  -0.55 [-0.69 -0.37]
## 44     0.67 [0.53 0.78]
## 45   -0.19 [-0.40 0.04]
## 46    0.13 [-0.10 0.35]
## 47  -0.61 [-0.74 -0.45]
## 48  -0.64 [-0.76 -0.48]
## 49                    1
## 50     0.86 [0.79 0.91]
## 51   -0.02 [-0.25 0.20]
## 52   -0.13 [-0.35 0.10]
## 53  -0.51 [-0.66 -0.32]
## 54    0.10 [-0.13 0.32]
## 55   -0.18 [-0.39 0.05]
## 56  -0.27 [-0.47 -0.05]
## 57     0.22 [0.00 0.43]
## 58  -0.52 [-0.67 -0.34]
## 59  -0.56 [-0.70 -0.38]
## 60     0.86 [0.79 0.91]
## 61                    1
## 62   -0.12 [-0.34 0.11]
## 63   -0.02 [-0.25 0.21]
## 64  -0.45 [-0.61 -0.25]
## 65    0.19 [-0.03 0.40]
## 66  -0.27 [-0.46 -0.04]
## 67     0.75 [0.63 0.83]
## 68  -0.83 [-0.89 -0.74]
## 69     0.33 [0.11 0.52]
## 70     0.34 [0.13 0.53]
## 71   -0.02 [-0.25 0.20]
## 72   -0.12 [-0.34 0.11]
## 73                    1
## 74  -0.31 [-0.50 -0.09]
## 75     0.58 [0.41 0.71]
## 76  -0.83 [-0.89 -0.75]
## 77     0.78 [0.67 0.85]
## 78  -0.44 [-0.61 -0.24]
## 79     0.37 [0.16 0.55]
## 80   -0.23 [-0.43 0.00]
## 81   -0.16 [-0.37 0.07]
## 82   -0.13 [-0.35 0.10]
## 83   -0.02 [-0.25 0.21]
## 84  -0.31 [-0.50 -0.09]
## 85                    1
## 86   -0.05 [-0.28 0.18]
## 87     0.32 [0.10 0.51]
## 88  -0.51 [-0.66 -0.32]
## 89     0.74 [0.62 0.83]
## 90  -0.77 [-0.85 -0.66]
## 91     0.86 [0.78 0.91]
## 92     0.88 [0.82 0.92]
## 93  -0.51 [-0.66 -0.32]
## 94  -0.45 [-0.61 -0.25]
## 95     0.58 [0.41 0.71]
## 96   -0.05 [-0.28 0.18]
## 97                    1
## 98  -0.78 [-0.86 -0.68]
## 99     0.81 [0.71 0.87]
## 100 -0.93 [-0.96 -0.90]
## 101    0.99 [0.99 0.99]
## 102 -0.56 [-0.70 -0.38]
## 103 -0.55 [-0.69 -0.37]
## 104   0.10 [-0.13 0.32]
## 105   0.19 [-0.03 0.40]
## 106 -0.83 [-0.89 -0.75]
## 107    0.32 [0.10 0.51]
## 108 -0.78 [-0.86 -0.68]
## 109                   1
## 110 -0.96 [-0.97 -0.93]
## 111    0.95 [0.92 0.97]
## 112 -0.97 [-0.98 -0.95]
## 113    0.69 [0.55 0.79]
## 114    0.67 [0.53 0.78]
## 115  -0.18 [-0.39 0.05]
## 116 -0.27 [-0.46 -0.04]
## 117    0.78 [0.67 0.85]
## 118 -0.51 [-0.66 -0.32]
## 119    0.81 [0.71 0.87]
## 120 -0.96 [-0.97 -0.93]
## 121                   1

#2PL vs. 3PN discrimination
GG_scatter(item_params, "discrimination", "discrimination_3pl") + theme_large

#nonsense finding!
GG_scatter(item_params, "discrimination", "discrimination_2pl") + theme_large

#fairly close
GG_scatter(item_params, "difficulty", "difficulty_2pl") + theme_large

#main finding with new discrims
GG_scatter(item_params, "discrimination_3pl", "item_fertility") + theme_large

GG_scatter(item_params, "discrimination_2pl", "item_fertility") + theme_large

#model
lm(item_fertility ~ discrimination_2pl + difficulty_2pl, data = item_params) %>% MOD_summary(kfold = F)

## 
##     ---- Model summary ----    
## Model coefficients
##                     Beta   SE CI_lower CI_upper
## discrimination_2pl -1.11 0.14     -1.4    -0.84
## difficulty_2pl     -0.76 0.14     -1.0    -0.49
## 
## 
## Model meta-data
##          outcome  N df   R2 R2-adj. R2-cv
## 1 item_fertility 75 72 0.49    0.47    NA
## 
## 
## Etas from analysis of variance
##                     Eta Eta_partial
## discrimination_2pl 0.69        0.69
## difficulty_2pl     0.48        0.55

##try without items with extreme pass rates (<.05 or >.95
#subset item data
nonextreme_items = is_between(item_params$pass_rate, .05, .95) %>% which

#make list for sub-analysis
no_extreme = list(
  item_count = nonextreme_items %>% length,
  #items
  cog_items = cog_items[nonextreme_items]
)

#irt
no_extreme[["irt_fa"]] = irt.fa(no_extreme$cog_items)

no_extreme[["irt_fa_2f"]] = irt.fa(no_extreme$cog_items, nfactors = 2, plot = F)

## Loading required namespace: GPArotation

## The estimated weights for the factor scores are probably incorrect.  Try a different factor extraction method.

no_extreme[["irt_fa_3f"]] = irt.fa(no_extreme$cog_items, nfactors = 3, plot = F)

## The estimated weights for the factor scores are probably incorrect.  Try a different factor extraction method.

no_extreme[["irt_fa_3pl"]] = ltm::tpm(no_extreme$cog_items)
no_extreme[["irt_fa_2pl"]] = ltm::tpm(no_extreme$cog_items, constraint = matrix(c(1:no_extreme$item_count, rep(1, no_extreme$item_count), rep(0, no_extreme$item_count)), ncol = 3))

#model outputs
no_extreme[["irt_fa"]]

## Item Response Analysis using Factor Analysis 
## 
## Call: irt.fa(x = no_extreme$cog_items)
## Item Response Analysis using Factor Analysis  
## 
##  Summary information by factor and item
##  Factor =  1 
##               -3    -2    -1     0    1    2    3
## item_12     0.21  0.43  0.39  0.16 0.05 0.01 0.00
## item_15     0.31  0.45  0.28  0.10 0.03 0.01 0.00
## item_16     0.29  0.59  0.41  0.13 0.03 0.01 0.00
## item_17     0.49  0.77  0.31  0.06 0.01 0.00 0.00
## item_19     0.18  0.32  0.30  0.16 0.06 0.02 0.01
## item_20     0.25  0.58  0.47  0.15 0.04 0.01 0.00
## item_22     0.08  0.31  0.63  0.40 0.12 0.03 0.01
## item_23     0.23  0.36  0.28  0.13 0.04 0.01 0.00
## item_24     0.24  0.72  0.58  0.15 0.03 0.00 0.00
## item_25     0.12  0.24  0.28  0.20 0.09 0.04 0.01
## item_26     0.29  0.98  0.60  0.11 0.01 0.00 0.00
## item_27     0.23  0.53  0.46  0.16 0.04 0.01 0.00
## item_28     0.20  0.51  0.49  0.19 0.05 0.01 0.00
## item_29     0.17  0.34  0.36  0.19 0.07 0.02 0.01
## item_30     0.33  0.79  0.47  0.11 0.02 0.00 0.00
## item_31     0.10  0.29  0.45  0.31 0.11 0.03 0.01
## item_32     0.08  0.28  0.59  0.42 0.13 0.03 0.01
## item_33     0.06  0.15  0.29  0.31 0.19 0.08 0.03
## item_34     0.12  0.30  0.41  0.26 0.10 0.03 0.01
## item_35     0.02  0.06  0.15  0.27 0.29 0.18 0.08
## item_36     0.10  0.29  0.48  0.33 0.12 0.03 0.01
## item_37     0.08  0.32  0.68  0.42 0.11 0.02 0.00
## item_38     0.04  0.15  0.39  0.47 0.24 0.07 0.02
## item_39     0.10  0.28  0.44  0.30 0.11 0.03 0.01
## item_40     0.09  0.36  0.66  0.37 0.10 0.02 0.00
## item_41     0.02  0.14  0.75  0.94 0.21 0.03 0.00
## item_43     0.03  0.10  0.28  0.44 0.30 0.11 0.03
## item_44     0.02  0.15  0.76  0.91 0.21 0.03 0.00
## item_45     0.07  0.34  0.79  0.45 0.10 0.02 0.00
## item_46     0.06  0.10  0.15  0.16 0.12 0.08 0.04
## item_47     0.04  0.07  0.09  0.11 0.10 0.08 0.05
## item_48     0.03  0.08  0.21  0.36 0.32 0.15 0.05
## item_50     0.00  0.04  0.24  0.90 0.67 0.13 0.02
## item_53     0.05  0.15  0.37  0.43 0.22 0.07 0.02
## item_54     0.04  0.07  0.11  0.14 0.13 0.10 0.06
## item_55     0.02  0.07  0.27  0.60 0.43 0.13 0.03
## item_57     0.03  0.07  0.12  0.17 0.17 0.12 0.07
## item_58     0.03  0.06  0.13  0.21 0.22 0.15 0.08
## item_59     0.02  0.06  0.14  0.25 0.28 0.18 0.08
## item_60     0.04  0.09  0.19  0.28 0.24 0.13 0.05
## item_61     0.04  0.08  0.13  0.16 0.15 0.10 0.05
## item_62     0.03  0.05  0.08  0.09 0.09 0.08 0.06
## item_63     0.03  0.08  0.17  0.26 0.24 0.14 0.06
## item_64     0.04  0.07  0.12  0.17 0.16 0.12 0.06
## item_65     0.04  0.08  0.15  0.20 0.19 0.12 0.06
## item_66     0.02  0.07  0.19  0.36 0.34 0.17 0.06
## item_70     0.04  0.07  0.10  0.12 0.12 0.09 0.06
## item_71     0.04  0.07  0.11  0.14 0.14 0.11 0.07
## Test Info   5.18 12.56 16.48 13.69 7.34 3.15 1.30
## SEM         0.44  0.28  0.25  0.27 0.37 0.56 0.88
## Reliability 0.81  0.92  0.94  0.93 0.86 0.68 0.23
## 
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## 
## Test of the hypothesis that 1 factor is sufficient.
## The degrees of freedom for the model is 1710  and the objective function was  19 
## The number of observations was  1409  with Chi Square =  26881  with prob <  0 
## 
## The root mean square of the residuals (RMSA) is  0.08 
## The df corrected root mean square of the residuals is  0.08 
## 
## Tucker Lewis Index of factoring reliability =  0.51
## RMSEA index =  0.1  and the 10 % confidence intervals are  0.1 0.1
## BIC =  14482

no_extreme[["irt_fa_2f"]]

## Item Response Analysis using Factor Analysis 
## 
## Call: irt.fa(x = no_extreme$cog_items, nfactors = 2, plot = F)
## 
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## 
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 1651  and the objective function was  15 
## The number of observations was  1409  with Chi Square =  21261  with prob <  0 
## 
## The root mean square of the residuals (RMSA) is  0.05 
## The df corrected root mean square of the residuals is  0.05 
## 
## Tucker Lewis Index of factoring reliability =  0.6
## RMSEA index =  0.093  and the 10 % confidence intervals are  0.091 0.093
## BIC =  9290
##  With factor correlations of 
##      MR1  MR2
## MR1 1.00 0.48
## MR2 0.48 1.00

no_extreme[["irt_fa_3f"]]

## Item Response Analysis using Factor Analysis 
## 
## Call: irt.fa(x = no_extreme$cog_items, nfactors = 3, plot = F)
## 
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## 
## Test of the hypothesis that 3 factors are sufficient.
## The degrees of freedom for the model is 1593  and the objective function was  14 
## The number of observations was  1409  with Chi Square =  19941  with prob <  0 
## 
## The root mean square of the residuals (RMSA) is  0.04 
## The df corrected root mean square of the residuals is  0.04 
## 
## Tucker Lewis Index of factoring reliability =  0.61
## RMSEA index =  0.091  and the 10 % confidence intervals are  0.089 0.092
## BIC =  8391
##  With factor correlations of 
##      MR1  MR2  MR3
## MR1 1.00 0.42 0.45
## MR2 0.42 1.00 0.45
## MR3 0.45 0.45 1.00

#item params
no_extreme$item_params = data_frame(
  item = names(no_extreme$cog_items),
  word = fertility_data$Word[nonextreme_items],
  pass_rate = no_extreme$cog_items %>% colMeans(),
  difficulty = no_extreme$irt_fa$irt$difficulty[[1]],
  loading = no_extreme$irt_fa$fa$loadings %>% as.vector(),
  discrimination = no_extreme$irt_fa$irt$discrimination %>% as.vector,
  discrimination_2pl = no_extreme$irt_fa_2pl$coefficients[, 3],
  discrimination_3pl = no_extreme$irt_fa_3pl$coefficients[, 3]
)

#item x fertility relations
no_extreme$item_params$item_fertility = map_dbl(no_extreme$item_params$item, function(x) {
  #extract and calculate and extract
  d[c(x, "offspring")] %>% 
    #as normal data frame
    as.data.frame %>%
    #correlate
    polycor::hetcor() %>% 
    #extract r
    .[[1]] %>% .[1, 2]
})


#standard errors
no_extreme$item_params$item_fertility_se = map_dbl(no_extreme$item_params$item, function(x) {
  #extract and calculate and extract
  d[c(x, "offspring")] %>% 
    #as normal data frame
    as.data.frame() %>%
    #correlate
    polycor::hetcor() %>% 
    #extract r
    .[["std.errors"]] %>% .[1, 2]
})

#cors
cor_matrix(no_extreme$item_params[-c(1:2)], CI = .95)

##                    pass_rate             difficulty           
## pass_rate          "1"                   "-0.99 [-0.99 -0.98]"
## difficulty         "-0.99 [-0.99 -0.98]" "1"                  
## loading            "0.72 [0.57 0.82]"    "-0.69 [-0.81 -0.54]"
## discrimination     "0.70 [0.54 0.81]"    "-0.68 [-0.79 -0.51]"
## discrimination_2pl "0.80 [0.69 0.88]"    "-0.83 [-0.90 -0.73]"
## discrimination_3pl "-0.42 [-0.61 -0.19]" "0.38 [0.14 0.58]"   
## item_fertility     "-0.49 [-0.66 -0.27]" "0.49 [0.27 0.66]"   
## item_fertility_se  "-0.44 [-0.62 -0.21]" "0.44 [0.21 0.62]"   
##                    loading               discrimination       
## pass_rate          "0.72 [0.57 0.82]"    "0.70 [0.54 0.81]"   
## difficulty         "-0.69 [-0.81 -0.54]" "-0.68 [-0.79 -0.51]"
## loading            "1"                   "0.98 [0.97 0.99]"   
## discrimination     "0.98 [0.97 0.99]"    "1"                  
## discrimination_2pl "0.90 [0.84 0.94]"    "0.92 [0.87 0.95]"   
## discrimination_3pl "-0.35 [-0.56 -0.11]" "-0.30 [-0.51 -0.05]"
## item_fertility     "-0.63 [-0.76 -0.45]" "-0.63 [-0.76 -0.45]"
## item_fertility_se  "-0.53 [-0.69 -0.31]" "-0.54 [-0.70 -0.33]"
##                    discrimination_2pl    discrimination_3pl   
## pass_rate          "0.80 [0.69 0.88]"    "-0.42 [-0.61 -0.19]"
## difficulty         "-0.83 [-0.90 -0.73]" "0.38 [0.14 0.58]"   
## loading            "0.90 [0.84 0.94]"    "-0.35 [-0.56 -0.11]"
## discrimination     "0.92 [0.87 0.95]"    "-0.30 [-0.51 -0.05]"
## discrimination_2pl "1"                   "-0.24 [-0.46 0.02]" 
## discrimination_3pl "-0.24 [-0.46 0.02]"  "1"                  
## item_fertility     "-0.61 [-0.75 -0.43]" "0.16 [-0.09 0.40]"  
## item_fertility_se  "-0.53 [-0.69 -0.33]" "0.15 [-0.11 0.39]"  
##                    item_fertility        item_fertility_se    
## pass_rate          "-0.49 [-0.66 -0.27]" "-0.44 [-0.62 -0.21]"
## difficulty         "0.49 [0.27 0.66]"    "0.44 [0.21 0.62]"   
## loading            "-0.63 [-0.76 -0.45]" "-0.53 [-0.69 -0.31]"
## discrimination     "-0.63 [-0.76 -0.45]" "-0.54 [-0.70 -0.33]"
## discrimination_2pl "-0.61 [-0.75 -0.43]" "-0.53 [-0.69 -0.33]"
## discrimination_3pl "0.16 [-0.09 0.40]"   "0.15 [-0.11 0.39]"  
## item_fertility     "1"                   "0.90 [0.84 0.94]"   
## item_fertility_se  "0.90 [0.84 0.94]"    "1"

#plots
GG_scatter(no_extreme$item_params, "loading", "item_fertility", case_names = "item") + theme_large

GG_save("figs/ne_loading_fert.png")
GG_scatter(no_extreme$item_params, "difficulty", "item_fertility", case_names = "item") + theme_large

GG_save("figs/ne_difficulty_fert.png")

#model
lm(item_fertility ~ loading + difficulty, data = no_extreme$item_params) %>% MOD_summary(kfold = F)

## 
##     ---- Model summary ----    
## Model coefficients
##              Beta   SE CI_lower CI_upper
## loading    -0.561 0.14    -0.85    -0.28
## difficulty  0.097 0.14    -0.19     0.38
## 
## 
## Model meta-data
##          outcome  N df  R2 R2-adj. R2-cv
## 1 item_fertility 60 57 0.4    0.38    NA
## 
## 
## Etas from analysis of variance
##             Eta Eta_partial
## loading    0.40        0.46
## difficulty 0.07        0.09

Output data

Output summary stats for reuse.

#main
write_csv(item_params, "data/item_params.csv", na = "")
write_rds(item_params, "data/item_params.rds")

#no extreme items
write_csv(no_extreme$item_params, "data/item_params_no_extreme.csv", na = "")
write_rds(no_extreme$item_params, "data/item_params_no_extreme.rds")

Selection against vocabulary ability is (mostly) on g

A (methodologically sound) application of the method of correlated vectors to a large vocabulary item-pool

Init

Data

Main IRT analysis

Person-level analysis

Other IRT analyses

Output data