Load data

load("data/CDI-III-Spanish.Rdata")
load("data/CDI-III-English.Rdata")



bad_Ss = which(rowSums(en_voc, na.rm=T)==0)
# 2 subjects with no correct items; can't estimate

# weird values... one "3", one "11", and one "12"..replace with NAs for now
#table(unlist(en_voc))
which(apply(en_voc, 1, max)>1)

## [1] 152 154

en_voc[152, which(en_voc[152,]==max(en_voc[152,])) ] = NA # somersault
en_voc[154, which(en_voc[154,]==max(en_voc[154,])) ] = NA # peculiar
en_voc[which(en_voc$because>1),]$because = NA
en_voc[which(unlist(en_voc)==11)] <- NA
en_voc[which(unlist(en_voc)==12)] <- NA

#sort(colSums(en_voc, na.rm=T)) 

# item-level data for Philip
#bind_cols(en_demo, en_voc) %>% write_csv(file="data/English-item-level-data.csv")

en_voc = en_voc[-bad_Ss,]
en_demo = en_demo[-bad_Ss,]

en_grm <- en_grm[-bad_Ss,]

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 15 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 15 rows containing missing values

## Warning: Removed 14 rows containing missing values (geom_point).

## Warning: Removed 14 rows containing missing values (geom_point).

## Warning: Removed 14 rows containing non-finite values (stat_density).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 20 rows containing missing values

## Warning: Removed 15 rows containing missing values (geom_point).

## Warning: Removed 15 rows containing missing values (geom_point).

## Warning: Removed 20 rows containing missing values (geom_point).

## Warning: Removed 15 rows containing non-finite values (stat_density).

Two English participants did not know any of the words, so we can’t fit the models with them. Everybody remaining in the sample knows CRACKER, and nobody in the sample knows THEY, so we prune those items.

Fit English models

en_1pl <- mirt(en_voc, 1, 'Rasch', verbose=T)
en_2pl <- mirt(en_voc, 1, '2PL', verbose=T)
# anova(en_1pl, en_2pl) # AIC prefers 2PL, BIC prefers 1PL
cdat <- data.frame(age_sc = scale(en_demo$Age))
en_1pl_age <- mirt(en_voc, 1, 'Rasch', verbose=T, 
                covdata = cdat, formula = ~ age_sc)
# try a 2d model:
en_2pl_2d <- mirt(en_voc, 2, '2PL', verbose=T) # not converged NITER=500
# anova(en_2pl, en_2pl_2d) # AIC prefers 1D, BIC prefers 2D

save(en_1pl, en_2pl, en_1pl_age, en_2pl_2d, file="data/EN_1pl_2pl.Rdata")

Now we fit only to the 113 participants ages 30-37 months (the intended CDI-III age range).

age30to37 <- which(en_demo$Age>29 & en_demo$Age<38)
en_1pl <- mirt(en_voc[age30to37,], 1, 'Rasch', verbose=T)
en_2pl <- mirt(en_voc[age30to37,], 1, '2PL', verbose=T)
cdat <- data.frame(age_sc = scale(en_demo[age30to37,]$Age))
en_1pl_age <- mirt(en_voc[age30to37,], 1, 'Rasch', verbose=T, 
                covdata = cdat, formula = ~ age_sc)

save(en_1pl, en_2pl, en_1pl_age, file="data/EN_1pl_2pl_30-37mos.Rdata")

How similar are the parameter estimates using the full age range vs. just the 30-37 month-olds?

## Joining, by = "definition"

The 1PL item parameters for the full vs. limited age range are strongly correlated (\(r\) = 0.994) and looks homoscedastic, which is comforting.

#en_voc <- en_voc %>% select(-THEY) 
load("data/EN_1pl_2pl.Rdata")

anova(en_1pl, en_2pl)

## 
## Model 1: mirt(data = en_voc, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = en_voc, model = 1, itemtype = "2PL", verbose = T)

##        AIC     AICc    SABIC       HQ      BIC    logLik      X2  df   p
## 1 15548.68 15710.91 15575.38 15688.59 15895.48 -7673.339     NaN NaN NaN
## 2 15347.96 18219.38 15400.83 15625.01 16034.70 -7473.978 398.722  99   0

For English, AIC prefers the 2PL model and BIC prefers the Rasch (1PL) model.

Age covariate model

anova(en_1pl, en_1pl_age)

## 
## Model 1: mirt(data = en_voc, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = en_voc, model = 1, itemtype = "Rasch", covdata = cdat, 
##     formula = ~age_sc, verbose = T)

##        AIC     AICc    SABIC       HQ      BIC    logLik     X2  df   p
## 1 15548.68 15710.91 15575.38 15688.59 15895.48 -7673.339    NaN NaN NaN
## 2 15488.55 15655.31 15515.51 15629.84 15838.79 -7642.275 62.128   1   0

AIC and BIC both prefer the age model – we should consider using this, but for now we’ll stick with the Rasch. The ability estimates from the 1PL vs. 1PL with age model are strongly correlated, as shown below.

en_demo <- en_demo %>% 
  mutate(theta1pl = fscores(en_1pl)[,"F1"],
         theta1pl_age = fscores(en_1pl_age)[,"F1"])

en_demo %>% ggplot(aes(x=theta1pl, y=theta1pl_age, color=Age)) + 
  geom_point() + theme_bw()

Age coefficient: 3.72 (LR \(\beta\) = 1.1).

Item bank

Our next goal is to determine if all items should be included in the item bank. Items that have very bad properties should probably be dropped. We first prune any ill-fitting items (S_X2 p<.01) from the full 1PL model.

5 items did not fit well in the full 2PL model. These items are shown below.

donkey	sneeze
football	their
microscope

Below we show item information plots for these bad items.

Why are these items bad? Some have fairly low rates of responding (microscope and their) or a high rate (football).

## microscope      their     donkey     sneeze   football 
##       0.16       0.19       0.66       0.68       0.89

Age and Sex Regression

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: TotalVocab ~ Sex * Age_sc + (1 | ID)
##    Data: en_demo_sc
## 
## REML criterion at convergence: 2025.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.52172 -0.29341  0.08585  0.36622  1.08867 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 322.2    17.95   
##  Residual             103.1    10.16   
## Number of obs: 229, groups:  ID, 228
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)  72.837458   1.959275 224.165880  37.176  < 2e-16 ***
## SexM         -5.856561   2.733561 223.892322  -2.142   0.0332 *  
## Age_sc        2.271520   0.368202 224.165880   6.169 3.17e-09 ***
## SexM:Age_sc  -0.004623   0.513052 222.733821  -0.009   0.9928    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SexM   Age_sc
## SexM        -0.717              
## Age_sc      -0.041  0.029       
## SexM:Age_sc  0.029 -0.009 -0.718

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: theta1pl ~ Sex * Age_sc + (1 | ID)
##    Data: en_demo_sc
## 
## REML criterion at convergence: 954.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.99483 -0.26417 -0.00905  0.24282  0.90974 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 3.1507   1.775   
##  Residual             0.5084   0.713   
## Number of obs: 229, groups:  ID, 228
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   0.31407    0.18171 224.03186   1.728   0.0853 .  
## SexM         -0.55250    0.25356 223.88498  -2.179   0.0304 *  
## Age_sc        0.20625    0.03415 224.03186   6.040 6.36e-09 ***
## SexM:Age_sc  -0.01803    0.04761 223.33438  -0.379   0.7053    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SexM   Age_sc
## SexM        -0.717              
## Age_sc      -0.041  0.029       
## SexM:Age_sc  0.029 -0.010 -0.717

## Warning in Analyze.model(focal.predictors, mod, xlevels, default.levels, : the
## predictor Age_sc is a one-column matrix that was converted to a vector

## Warning in Analyze.model(focal.predictors, mod, xlevels, default.levels, : the
## predictor Age_sc is a one-column matrix that was converted to a vector

Non-vocabulary Items

Let’s try fitting an IRT model on vocabulary, language complexity, and language use items all together, and test for multidimensionality.

load("data/EN_multidim_fits.Rdata")

anova(en_1pl_all, enm[[1]]) # 2PL preferred by all metrics

## 
## Model 1: mirt(data = en_all, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = en_all, model = 1, itemtype = "2PL", verbose = T)

##        AIC     AICc    SABIC       HQ      BIC    logLik       X2  df   p
## 1 21822.70 22128.52 21855.74 21995.85 22251.91 -10786.35      NaN NaN NaN
## 2 20634.54 14459.34 20700.10 20978.08 21486.10 -10069.27 1434.156 123   0

anova(enm[[1]], enm[[2]]) # 2d preferred by all metrics

## 
## Model 1: mirt(data = en_all, model = 1, itemtype = "2PL", verbose = T)
## Model 2: mirt(data = en_all, model = 2, itemtype = "2PL", verbose = T)

##        AIC     AICc    SABIC       HQ      BIC     logLik       X2  df   p
## 1 20634.54 14459.34 20700.10 20978.08 21486.10 -10069.271      NaN NaN NaN
## 2 19733.31 17803.08 19831.39 20247.24 21007.22  -9495.657 1147.228 123   0

anova(enm[[2]], enm[[3]]) # AIC prefers 3d, BIC prefers 2d

## 
## Model 1: mirt(data = en_all, model = 2, itemtype = "2PL", verbose = T)
## Model 2: mirt(data = en_all, model = 3, itemtype = "2PL", verbose = T, 
##     technical = list(NCYCLES = 1000))

##        AIC     AICc    SABIC       HQ      BIC    logLik      X2  df   p
## 1 19733.31 17803.08 19831.39 20247.24 21007.22 -9495.657     NaN NaN NaN
## 2 19540.47 17702.41 19670.80 20223.39 21233.29 -9277.233 436.848 122   0

anova(enm[[2]], en_2pl_3f) # AIC prefers confirmatory 3-factor (not BIC ?? but conf is simpler..)

## 
## Model 1: mirt(data = en_all, model = mod_2pl_3f, itemtype = "2PL", verbose = T, 
##     technical = list(NCYCLES = 1000))
## Model 2: mirt(data = en_all, model = 2, itemtype = "2PL", verbose = T)

##        AIC     AICc    SABIC       HQ      BIC      DIC    logLik    logPost
## 1 20033.12 13857.92 20098.68 20376.66 20884.68 20590.06 -9768.560 -10047.031
## 2 19733.31 17803.08 19831.39 20247.24 21007.22 19733.31 -9495.657  -9495.657
##    df Bayes_Factor
## 1 NaN           NA
## 2 123            0

anova(enm[[3]], en_2pl_3f) # AIC prefers confirmatory 3-factor

## 
## Model 1: mirt(data = en_all, model = mod_2pl_3f, itemtype = "2PL", verbose = T, 
##     technical = list(NCYCLES = 1000))
## Model 2: mirt(data = en_all, model = 3, itemtype = "2PL", verbose = T, 
##     technical = list(NCYCLES = 1000))

##        AIC     AICc    SABIC       HQ      BIC      DIC    logLik    logPost
## 1 20033.12 13857.92 20098.68 20376.66 20884.68 20590.06 -9768.560 -10047.031
## 2 19540.47 17702.41 19670.80 20223.39 21233.29 19540.47 -9277.233  -9277.233
##    df Bayes_Factor
## 1 NaN           NA
## 2 245            0

## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

f2loads %>% group_by(category) %>%
  summarise(F1mean = mean(F1), F2mean = mean(F2), 
            F1sd = sd(F1), F2sd = sd(F2))

## # A tibble: 3 x 5
##   category   F1mean  F2mean   F1sd   F2sd
##   <chr>       <dbl>   <dbl>  <dbl>  <dbl>
## 1 COMPLEX  -0.798   0.0965  0.0270 0.0618
## 2 USELANG   0.00320 0.599   0.137  0.382 
## 3 vocab    -0.750   0.00787 0.107  0.219

Fit Spanish models

sp_1pl <- mirt(sp_voc, 1, 'Rasch', verbose=T) 
sp_2pl <- mirt(sp_voc, 1, '2PL', verbose=T) 
cdat <- data.frame(age_sc = scale(sp_demo$Age))
sp_1pl_age <- mirt(sp_voc, 1, 'Rasch', verbose=T, 
                covdata = cdat, formula = ~ age_sc)

save(sp_1pl, sp_2pl, sp_1pl_age, file="data/SP_1pl_2pl.Rdata")

load("data/SP_1pl_2pl.Rdata")

anova(sp_1pl, sp_2pl)

## 
## Model 1: mirt(data = sp_voc, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = sp_voc, model = 1, itemtype = "2PL", verbose = T)

##        AIC     AICc    SABIC       HQ      BIC    logLik      X2  df   p
## 1 55906.65 55951.45 56023.51 56077.45 56344.14 -27852.33     NaN NaN NaN
## 2 55765.71 55988.43 55997.11 56103.93 56632.01 -27682.85 338.944  99   0

AIC prefers the 2PL model, but BIC prefers the Rasch model.

Age covariate model

anova(sp_1pl, sp_1pl_age) # AIC and BIC both prefer the age model

## 
## Model 1: mirt(data = sp_voc, model = 1, itemtype = "Rasch", verbose = T)
## Model 2: mirt(data = sp_voc, model = 1, itemtype = "Rasch", covdata = cdat, 
##     formula = ~age_sc, verbose = T)

##        AIC     AICc    SABIC       HQ      BIC    logLik     X2  df   p
## 1 55906.65 55951.45 56023.51 56077.45 56344.14 -27852.33    NaN NaN NaN
## 2 55839.45 55885.23 55957.46 56011.94 56281.26 -27817.72 69.205   1   0

sp_demo <- sp_demo %>% 
  mutate(theta1pl = fscores(sp_1pl)[,"F1"],
         theta1pl_age = fscores(sp_1pl_age)[,"F1"])

sp_demo %>% ggplot(aes(x=theta1pl, y=theta1pl_age, color=Age)) + 
  geom_point() + theme_bw()

Age coefficient: 2.16 (LR \(\beta\) = 0.54).

Item bank

7 items did not fit well in the full 2PL model. These items are shown below.

grúa	apestoso/a
nido	lado
dentista	nunca
disparar

Below we show item information plots for these bad items.

CDI-III IRT Analysis

Philip Dale & George Kachergis

2021-06-28

Load data

Fit English models

How similar are the parameter estimates using the full age range vs. just the 30-37 month-olds?

Age covariate model

Item bank

Age and Sex Regression

Non-vocabulary Items

Fit Spanish models

Age covariate model

Item bank