Compare CAT Ability Estimates and Full CDI Scores

Load the things

Data were collected from 250 participants, 47 of whom were excluded based on our preregistered criteria (mismatched demographic data, low birthweight, or developmental delays).

An additional 0 participants met our strict exclusion criterion of disagreement on at least 75% of their CAT vs. full CDI responses, but several other participants showed substantial disagreement (see histogram above).

Reshape data

# check for NA responses 
# need to filter down to the 680 words in the item_key
#item = definition if using definition,
#item = test_word if using item IDs

full_w <- resps %>% arrange(definition) %>%
  select(-response_cat) %>%
  mutate(item = definition, 
         produces = case_when(response_full == "produces" ~ 1,
                              response_full == "no_produces" ~ 0,
                              TRUE ~ NA_real_)) %>% 
  pivot_wider(id_cols = "subject_id", 
              names_from = item, 
              values_from = produces) %>%
  arrange(subject_id)

cat_w <- resps %>% #filter(response_cat!="no_test") %>%
  arrange(definition) %>%
  select(-response_full) %>%
  mutate(item = definition, 
         produces = case_when(response_cat == "produces" ~ 1,
                              response_cat == "no_produces" ~ 0,
                              TRUE ~ NA_real_)) %>%
  pivot_wider(id_cols = "subject_id", 
              names_from = item, 
              values_from = produces,
              values_fill = NA) %>%
  arrange(subject_id)

setdiff(demo$subject_id, kept_demo$subject_id)

## character(0)

demo <- demo %>% arrange(subject_id)

table(demo$sex_full)

## 
## Female   Male 
##    102    101

#table(demo$primary_education_cat)
hist(demo$primary_education_cat, xlab="Primary Caregiver's Years of Education", main="", ylab="# of Participants")

table(demo$primary_caregiver_cat)

## 
## father mother 
##     36    167

hist(demo$age_full, xlab="Age (months)", main="", ylab="# of Participants")

Examine data from full CDI

prod_s <- full_w %>% mutate(production = Reduce("+",.[2:681])) %>%
  select(subject_id, production) %>% 
  left_join(demo %>% select(subject_id, sex_full, age_full, order,
                            ), by="subject_id")

full_mat <- as.matrix(full_w %>% select(-subject_id))
cat_mat <- as.matrix(cat_w %>% select(-subject_id))

full_thetas <- data.frame(fscores(mod_2pl, method="MAP", response.pattern = full_mat)[,c("F1","SE_F1")])
cat_thetas <- data.frame(fscores(mod_2pl, method="MAP", response.pattern = cat_mat)[,c("F1","SE_F1")])

prod_s$fullTheta <- full_thetas[,1]
prod_s$fullTheta_SE <- full_thetas[,2]
prod_s$catTheta <- cat_thetas[,1]
prod_s$catTheta_SE <- cat_thetas[,2]

p1 <- prod_s %>% rename(Sex=sex_full) %>%
  ggplot(aes(x=age_full, y=production, color=Sex)) + 
  geom_point(alpha=.7) + theme_classic() + 
  xlab("Age (mos)") + ylab("Full CDI Sumscore") +
  geom_smooth(method="lm")

p2 <- prod_s %>% rename(Sex=sex_full) %>%
  ggplot(aes(x=age_full, y=fullTheta, color=Sex)) + 
  geom_point(alpha=.7) + theme_classic() + 
  geom_errorbar(aes(ymin = fullTheta-fullTheta_SE, ymax = fullTheta+fullTheta_SE), 
                alpha=.7, width=.1) +
  xlab("Age (mos)") + ylab("Ability from Full CDI") +
  geom_smooth(method="lm")

ggarrange(p1, p2, ncol=2, common.legend = T)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Full CDI vs. CAT Ability Estimates

full_vs_cat_theta <- cor(full_thetas[,1], cat_thetas[,1])

prod_vs_cat_theta <- cor(prod_s$production, cat_thetas[,1]) 

full_theta_vs_sumscore <- cor(prod_s$production, full_thetas[,1])

The correlation between participants’ ability scores from the full CDI vs. the CAT is \(r = 0.92\). The correlation between participants’ sumscores from the full CDI vs. the CAT is \(r = 0.86\).

prod_s %>% rename(Sex=sex_full) %>%
  ggplot(aes(x = fullTheta, y = catTheta, color=Sex)) + geom_point(alpha=.7) + 
    geom_errorbar(aes(ymin = catTheta-catTheta_SE, ymax = catTheta+catTheta_SE), alpha=.7) + 
    geom_errorbarh(aes(xmin = fullTheta-fullTheta_SE, xmax = fullTheta+fullTheta_SE), alpha=.7) +
  theme_classic() + xlab("Ability from full CDI") + ylab("Ability from CAT") +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  geom_smooth(method=lm)

## `geom_smooth()` using formula 'y ~ x'

#ggsave(file="fullCDI_vs_CAT_ability.pdf", width=5.2, height=4.5)

Mostly on-target, although some overestimates from the CAT. Are these mostly from participants with large disagreement between CAT and full CDI items?

CAT Overestimation

## Joining, by = "subject_id"

## Joining, by = c("subject_id", "n")

(red = CAT-first; blue = CDI:WS-first)

The largest errors tended to come from participants who responded “knows” to a large proportion of the CAT items, and to some extent these are also the participants who disagree with themselves on the CAT vs. full CDI. Are these real responses (and indicative of some bias that parents have to want to indicate some minimum number of items that their child does know), or simply bad participants who are quickly swiping “yes”? Could look at RTs of the participants making a large proportion of ‘knows’ responses to see if they are very fast. Could also check in the real-data CAT simulations what the overall proportion ‘knows’ is supposed to be (presumably ~50%..): is it unreasonably low? If the CAT generally selects items the child doesn’t know (as it might, if the child is younger..), perhaps parents adjust their threshold and are more likely to say ‘knows’ (which in turn generates more difficult items..). In short, maybe we want to front-load the CAT with ~5 more easy age-based items, before getting into the actual CAT.