classifiers <- get_item_data(language = "Mandarin (Beijing)",
form = "WS") |>
filter(category == "classifiers")
classifier_knowledge <- get_instrument_data(language = "Mandarin (Beijing)",
form = "WS",
items = classifiers$item_id,
administration_info = TRUE,
item_info = classifiers)Acquisition of Mandarin classifiers
Of all the Mandarin data in Wordbank, only the Mandarin (Beijing) WS contains classifiers. We grab these data using wordbankr.
Model these data with a logistic regression.
classifier_model <- glmer(as.numeric(produces) ~ age + (1 | item_definition) + (1 | child_id),
data = classifier_knowledge,
family = binomial)Generate predicted acquisition trajectories for each item.
classifier_preddata <- expand_grid(age = 16:30,
item_definition = classifiers$item_definition)
classifier_predicted <- predict(classifier_model,
newdata = classifier_preddata,
re.form = ~ (1 | item_definition),
type = "response")
classifier_preddata <- classifier_preddata |>
mutate(predicted = classifier_predicted)Plot predicted trajectories.
classifier_labels <- classifier_preddata |>
filter(predicted > 0.5) |>
group_by(item_definition) |>
slice(1)
ggplot() +
# geom_jitter(aes(x = age, y = as.numeric(produces), col = item_definition),
# data = classifier_knowledge,
# alpha = .2,
# height = .1) +
geom_smooth(aes(x = age, y = predicted, col = item_definition),
data = classifier_preddata,
method = "glm",
method.args = list(family = "quasibinomial"),
formula = y ~ x,
se = FALSE) +
labs(x = "Age",
y = "Probability of production",
col = "Classifier") +
geom_label_repel(aes(x = age, y = predicted, col = item_definition,
label = item_definition),
data = classifier_labels,
min.segment.length = 0,
max.overlaps = 20,
force = 3,
family = "wqy-microhei") +
theme(legend.position = "none")