load("data/CDI-III-Spanish.Rdata")
load("data/CDI-III-English.Rdata")
bad_Ss = which(rowSums(en_voc, na.rm=T)==0)
# 2 subjects with no correct items; can't estimate
# weird values... one "3", one "11", and one "12"..replace with NAs for now
#table(unlist(en_voc))
which(apply(en_voc, 1, max)>1)
## [1] 152 154
en_voc[152, which(en_voc[152,]==max(en_voc[152,])) ] = NA # somersault
en_voc[154, which(en_voc[154,]==max(en_voc[154,])) ] = NA # peculiar
en_voc[which(en_voc$because>1),]$because = NA
en_voc[which(unlist(en_voc)==11)] <- NA
en_voc[which(unlist(en_voc)==12)] <- NA
#sort(colSums(en_voc, na.rm=T))
# item-level data for Philip
#bind_cols(en_demo, en_voc) %>% write_csv(file="data/English-item-level-data.csv")
en_voc = en_voc[-bad_Ss,]
en_demo = en_demo[-bad_Ss,]
en_grm <- en_grm[-bad_Ss,]
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 15 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 15 rows containing missing values
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 20 rows containing missing values
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 20 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing non-finite values (stat_density).
Two English participants did not know any of the words, so we can’t fit the models with them. Everybody remaining in the sample knows CRACKER, and nobody in the sample knows THEY, so we prune those items.
en_1pl <- mirt(en_voc, 1, 'Rasch', verbose=T)
en_2pl <- mirt(en_voc, 1, '2PL', verbose=T)
# anova(en_1pl, en_2pl) # AIC prefers 2PL, BIC prefers 1PL
cdat <- data.frame(age_sc = scale(en_demo$Age))
en_1pl_age <- mirt(en_voc, 1, 'Rasch', verbose=T,
covdata = cdat, formula = ~ age_sc)
# try a 2d model:
en_2pl_2d <- mirt(en_voc, 2, '2PL', verbose=T) # not converged NITER=500
# anova(en_2pl, en_2pl_2d) # AIC prefers 1D, BIC prefers 2D
save(en_1pl, en_2pl, en_1pl_age, en_2pl_2d, file="data/EN_1pl_2pl.Rdata")
Now we fit only to the 113 participants ages 30-37 months (the intended CDI-III age range).
age30to37 <- which(en_demo$Age>29 & en_demo$Age<38)
en_1pl <- mirt(en_voc[age30to37,], 1, 'Rasch', verbose=T)
en_2pl <- mirt(en_voc[age30to37,], 1, '2PL', verbose=T)
cdat <- data.frame(age_sc = scale(en_demo[age30to37,]$Age))
en_1pl_age <- mirt(en_voc[age30to37,], 1, 'Rasch', verbose=T,
covdata = cdat, formula = ~ age_sc)
save(en_1pl, en_2pl, en_1pl_age, file="data/EN_1pl_2pl_30-37mos.Rdata")
## Joining, by = "definition"
The 1PL item parameters for the full vs. limited age range are strongly correlated (\(r\) = 0.994) and looks homoscedastic, which is comforting.
#en_voc <- en_voc %>% select(-THEY)
load("data/EN_1pl_2pl.Rdata")
anova(en_1pl, en_2pl)
##
## Model 1: mirt(data = en_voc, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = en_voc, model = 1, itemtype = "2PL", verbose = T)
## AIC AICc SABIC HQ BIC logLik X2 df p
## 1 15548.68 15710.91 15575.38 15688.59 15895.48 -7673.339 NaN NaN NaN
## 2 15347.96 18219.38 15400.83 15625.01 16034.70 -7473.978 398.722 99 0
For English, AIC prefers the 2PL model and BIC prefers the Rasch (1PL) model.
anova(en_1pl, en_1pl_age)
##
## Model 1: mirt(data = en_voc, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = en_voc, model = 1, itemtype = "Rasch", covdata = cdat,
## formula = ~age_sc, verbose = T)
## AIC AICc SABIC HQ BIC logLik X2 df p
## 1 15548.68 15710.91 15575.38 15688.59 15895.48 -7673.339 NaN NaN NaN
## 2 15488.55 15655.31 15515.51 15629.84 15838.79 -7642.275 62.128 1 0
AIC and BIC both prefer the age model – we should consider using this, but for now we’ll stick with the Rasch. The ability estimates from the 1PL vs. 1PL with age model are strongly correlated, as shown below.
en_demo <- en_demo %>%
mutate(theta1pl = fscores(en_1pl)[,"F1"],
theta1pl_age = fscores(en_1pl_age)[,"F1"])
en_demo %>% ggplot(aes(x=theta1pl, y=theta1pl_age, color=Age)) +
geom_point() + theme_bw()
Age coefficient: 3.72 (LR \(\beta\) = 1.1).
Our next goal is to determine if all items should be included in the item bank. Items that have very bad properties should probably be dropped. We first prune any ill-fitting items (S_X2 p<.01) from the full 1PL model.
5 items did not fit well in the full 2PL model. These items are shown below.
| donkey | sneeze |
| football | their |
| microscope |
Below we show item information plots for these bad items.
Why are these items bad? Some have fairly low rates of responding (microscope and their) or a high rate (football).
## microscope their donkey sneeze football
## 0.16 0.19 0.66 0.68 0.89
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: TotalVocab ~ Sex * Age_sc + (1 | ID)
## Data: en_demo_sc
##
## REML criterion at convergence: 2025.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.52172 -0.29341 0.08585 0.36622 1.08867
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 322.2 17.95
## Residual 103.1 10.16
## Number of obs: 229, groups: ID, 228
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 72.837458 1.959275 224.165880 37.176 < 2e-16 ***
## SexM -5.856561 2.733561 223.892322 -2.142 0.0332 *
## Age_sc 2.271520 0.368202 224.165880 6.169 3.17e-09 ***
## SexM:Age_sc -0.004623 0.513052 222.733821 -0.009 0.9928
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SexM Age_sc
## SexM -0.717
## Age_sc -0.041 0.029
## SexM:Age_sc 0.029 -0.009 -0.718
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: theta1pl ~ Sex * Age_sc + (1 | ID)
## Data: en_demo_sc
##
## REML criterion at convergence: 954.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.99483 -0.26417 -0.00905 0.24282 0.90974
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 3.1507 1.775
## Residual 0.5084 0.713
## Number of obs: 229, groups: ID, 228
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.31407 0.18171 224.03186 1.728 0.0853 .
## SexM -0.55250 0.25356 223.88498 -2.179 0.0304 *
## Age_sc 0.20625 0.03415 224.03186 6.040 6.36e-09 ***
## SexM:Age_sc -0.01803 0.04761 223.33438 -0.379 0.7053
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SexM Age_sc
## SexM -0.717
## Age_sc -0.041 0.029
## SexM:Age_sc 0.029 -0.010 -0.717
## Warning in Analyze.model(focal.predictors, mod, xlevels, default.levels, : the
## predictor Age_sc is a one-column matrix that was converted to a vector
## Warning in Analyze.model(focal.predictors, mod, xlevels, default.levels, : the
## predictor Age_sc is a one-column matrix that was converted to a vector
Let’s try fitting an IRT model on vocabulary, language complexity, and language use items all together, and test for multidimensionality.
load("data/EN_multidim_fits.Rdata")
anova(en_1pl_all, enm[[1]]) # 2PL preferred by all metrics
##
## Model 1: mirt(data = en_all, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = en_all, model = 1, itemtype = "2PL", verbose = T)
## AIC AICc SABIC HQ BIC logLik X2 df p
## 1 21822.70 22128.52 21855.74 21995.85 22251.91 -10786.35 NaN NaN NaN
## 2 20634.54 14459.34 20700.10 20978.08 21486.10 -10069.27 1434.156 123 0
anova(enm[[1]], enm[[2]]) # 2d preferred by all metrics
##
## Model 1: mirt(data = en_all, model = 1, itemtype = "2PL", verbose = T)
## Model 2: mirt(data = en_all, model = 2, itemtype = "2PL", verbose = T)
## AIC AICc SABIC HQ BIC logLik X2 df p
## 1 20634.54 14459.34 20700.10 20978.08 21486.10 -10069.271 NaN NaN NaN
## 2 19733.31 17803.08 19831.39 20247.24 21007.22 -9495.657 1147.228 123 0
anova(enm[[2]], enm[[3]]) # AIC prefers 3d, BIC prefers 2d
##
## Model 1: mirt(data = en_all, model = 2, itemtype = "2PL", verbose = T)
## Model 2: mirt(data = en_all, model = 3, itemtype = "2PL", verbose = T,
## technical = list(NCYCLES = 1000))
## AIC AICc SABIC HQ BIC logLik X2 df p
## 1 19733.31 17803.08 19831.39 20247.24 21007.22 -9495.657 NaN NaN NaN
## 2 19540.47 17702.41 19670.80 20223.39 21233.29 -9277.233 436.848 122 0
anova(enm[[2]], en_2pl_3f) # AIC prefers confirmatory 3-factor (not BIC ?? but conf is simpler..)
##
## Model 1: mirt(data = en_all, model = mod_2pl_3f, itemtype = "2PL", verbose = T,
## technical = list(NCYCLES = 1000))
## Model 2: mirt(data = en_all, model = 2, itemtype = "2PL", verbose = T)
## AIC AICc SABIC HQ BIC DIC logLik logPost
## 1 20033.12 13857.92 20098.68 20376.66 20884.68 20590.06 -9768.560 -10047.031
## 2 19733.31 17803.08 19831.39 20247.24 21007.22 19733.31 -9495.657 -9495.657
## df Bayes_Factor
## 1 NaN NA
## 2 123 0
anova(enm[[3]], en_2pl_3f) # AIC prefers confirmatory 3-factor
##
## Model 1: mirt(data = en_all, model = mod_2pl_3f, itemtype = "2PL", verbose = T,
## technical = list(NCYCLES = 1000))
## Model 2: mirt(data = en_all, model = 3, itemtype = "2PL", verbose = T,
## technical = list(NCYCLES = 1000))
## AIC AICc SABIC HQ BIC DIC logLik logPost
## 1 20033.12 13857.92 20098.68 20376.66 20884.68 20590.06 -9768.560 -10047.031
## 2 19540.47 17702.41 19670.80 20223.39 21233.29 19540.47 -9277.233 -9277.233
## df Bayes_Factor
## 1 NaN NA
## 2 245 0
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
f2loads %>% group_by(category) %>%
summarise(F1mean = mean(F1), F2mean = mean(F2),
F1sd = sd(F1), F2sd = sd(F2))
## # A tibble: 3 x 5
## category F1mean F2mean F1sd F2sd
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 COMPLEX -0.798 0.0965 0.0270 0.0618
## 2 USELANG 0.00320 0.599 0.137 0.382
## 3 vocab -0.750 0.00787 0.107 0.219
sp_1pl <- mirt(sp_voc, 1, 'Rasch', verbose=T)
sp_2pl <- mirt(sp_voc, 1, '2PL', verbose=T)
cdat <- data.frame(age_sc = scale(sp_demo$Age))
sp_1pl_age <- mirt(sp_voc, 1, 'Rasch', verbose=T,
covdata = cdat, formula = ~ age_sc)
save(sp_1pl, sp_2pl, sp_1pl_age, file="data/SP_1pl_2pl.Rdata")
load("data/SP_1pl_2pl.Rdata")
anova(sp_1pl, sp_2pl)
##
## Model 1: mirt(data = sp_voc, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = sp_voc, model = 1, itemtype = "2PL", verbose = T)
## AIC AICc SABIC HQ BIC logLik X2 df p
## 1 55906.65 55951.45 56023.51 56077.45 56344.14 -27852.33 NaN NaN NaN
## 2 55765.71 55988.43 55997.11 56103.93 56632.01 -27682.85 338.944 99 0
AIC prefers the 2PL model, but BIC prefers the Rasch model.
anova(sp_1pl, sp_1pl_age) # AIC and BIC both prefer the age model
##
## Model 1: mirt(data = sp_voc, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = sp_voc, model = 1, itemtype = "Rasch", covdata = cdat,
## formula = ~age_sc, verbose = T)
## AIC AICc SABIC HQ BIC logLik X2 df p
## 1 55906.65 55951.45 56023.51 56077.45 56344.14 -27852.33 NaN NaN NaN
## 2 55839.45 55885.23 55957.46 56011.94 56281.26 -27817.72 69.205 1 0
sp_demo <- sp_demo %>%
mutate(theta1pl = fscores(sp_1pl)[,"F1"],
theta1pl_age = fscores(sp_1pl_age)[,"F1"])
sp_demo %>% ggplot(aes(x=theta1pl, y=theta1pl_age, color=Age)) +
geom_point() + theme_bw()
Age coefficient: 2.16 (LR \(\beta\) = 0.54).
Our next goal is to determine if all items should be included in the item bank. Items that have very bad properties should probably be dropped. We first prune any ill-fitting items (S_X2 p<.01) from the 1PL model.
7 items did not fit well in the full 2PL model. These items are shown below.
| grúa | apestoso/a |
| nido | lado |
| dentista | nunca |
| disparar |
Below we show item information plots for these bad items.