Acquisition of Mandarin classifiers

Of all the Mandarin data in Wordbank, only the Mandarin (Beijing) WS contains classifiers. We grab these data using wordbankr.

classifiers <- get_item_data(language = "Mandarin (Beijing)",
                             form = "WS") |> 
  filter(category == "classifiers")
classifier_knowledge <- get_instrument_data(language = "Mandarin (Beijing)",
                                            form = "WS",
                                            items = classifiers$item_id,
                                            administration_info = TRUE,
                                            item_info = classifiers)

Model these data with a logistic regression.

classifier_model <- glmer(as.numeric(produces) ~ age + (1 | item_definition) + (1 | child_id),
                          data = classifier_knowledge,
                          family = binomial)

Generate predicted acquisition trajectories for each item.

classifier_preddata <- expand_grid(age = 16:30,
                                   item_definition = classifiers$item_definition)
classifier_predicted <- predict(classifier_model,
                                newdata = classifier_preddata,
                                re.form = ~ (1 | item_definition),
                                type = "response")
classifier_preddata <- classifier_preddata |> 
  mutate(predicted = classifier_predicted)

Plot predicted trajectories.

classifier_labels <- classifier_preddata |> 
  filter(predicted > 0.5) |> 
  group_by(item_definition) |> 
  slice(1)
ggplot() +
  # geom_jitter(aes(x = age, y = as.numeric(produces), col = item_definition),
  #             data = classifier_knowledge,
  #             alpha = .2,
  #             height = .1) +
  geom_smooth(aes(x = age, y = predicted, col = item_definition),
              data = classifier_preddata,
              method = "glm", 
              method.args = list(family = "quasibinomial"),
              formula = y ~ x,
              se = FALSE) +
  labs(x = "Age",
       y = "Probability of production",
       col = "Classifier") +
  geom_label_repel(aes(x = age, y = predicted, col = item_definition,
                       label = item_definition),
                   data = classifier_labels,
                   min.segment.length = 0,
                   max.overlaps = 20,
                   force = 3,
                   family = "wqy-microhei") +
  theme(legend.position = "none")