This document reports some preliminary findings on 4-year-olds’ understanding of generic knowledge as common ground. Here is the preregistration. The data are all valid PANDA cases, although interference coding has not yet been completed.
In brief, participants’ behavior was correctly predicted in the warmup, test, and control phases. However, stronger evidence is needed for the predicted directional effect in the test phase. The results of each phase are detailed, below.
Session packages and seed.
library(brms) # Models
library(ggtext) # Plots
library(janitor) # Cleaning
library(knitr) # Reporting
library(parallel) # Parallelization
library(patchwork) # Plots
library(readxl) # Data import
library(tidybayes) # Analysis
library(tidyverse) # Wrangling
seed <- 10003 # RNG seed
# Import Qualtrics data
gkcg_qualtrics0 <- read_csv("data/kids_qualtrics_2025_dec16.csv")
gkcg_qualtrics <- gkcg_qualtrics0 %>%
row_to_names(1) %>% # first row to column names
clean_names()
# Import PANDA data
gkcg_panda0 <- read_csv("data/kids_panda_2025_dec17.csv")
gkcg_panda <- gkcg_panda0 %>%
clean_names() %>%
mutate(participation_date = as.Date(recorded_on), # date of participation
age_num = as.numeric(participation_date - birthday) / 365.25) # exact age in years
# Remove invalid PANDA cases
gkcg_panda_nopilot <- gkcg_panda %>%
filter(participation_date >= "2025-12-12") # Remove experimenter and pilot observations
gkcg_panda_valid <- gkcg_panda_nopilot %>%
filter(is_valid_case == "Yes") %>% # Only valid participants
arrange(recorded_on) %>% # arrange participation records by date/time
mutate(id_new = row_number()) # id's organized by date/time of participation
sum(gkcg_panda_nopilot$is_valid_case == "No") # Count of invalid cases from non-pilot data
id_new <- setNames(gkcg_panda_valid$id_new, gkcg_panda_valid$id)
# Create dataset
gkcg_data0 <- gkcg_qualtrics %>%
# Include only valid PANDA cases
filter(id %in% gkcg_panda_valid$id) %>%
# Include only these columns
select(
id, # participant id
starts_with("wrm"), # warmup trials
starts_with("generic"), # test trials
starts_with("specific"), # test trials
starts_with("c_")) %>% # control trials
# Remove timing columns
select(-c(ends_with("_start"), ends_with("_end"))) %>%
# Long data
pivot_longer(cols = !id, names_to = "variable", values_to = "response") %>%
# Exclude non-selected items
filter(response == "On") %>%
# Join with covariates from PANDA data (by PANDA ID)
left_join(gkcg_panda_valid %>%
select(id, gender, age_num), by = "id") %>%
# Split variable and response
separate(variable, into = c("variable", "response"), sep = "_(?=[^_]+$)") %>%
mutate(
# Relevel gender
gender = case_when(gender == "M" ~ "boy",
gender == "F" ~ "girl"),
# Remove leading "_q" in string
variable = str_remove(variable, "_q"),
# Create depicted gender
depicted_gender = case_when(
str_ends(variable, "boy") ~ NA_character_,
str_ends(variable, "girl") ~ NA_character_,
str_ends(variable, "_b") ~ "depicted_boy",
str_ends(variable, "_g") ~ "depicted_girl",
T ~ "0"),
# Create trial type
trial_type = case_when(
str_detect(variable, "wrm") ~ "warmup",
str_detect(variable, "generic") ~ "test",
str_detect(variable, "specific") ~ "test",
str_detect(variable, "c_") ~ "control",
T ~ NA_character_),
# Create trial subtype
trial_subtype = case_when(
# Warmup attention
str_detect(variable, "boy") ~ "boy_correct",
str_detect(variable, "girl") ~ "girl_correct",
# Warmup 3p
str_detect(variable, "wrm_3pp") ~ "3pp",
str_detect(variable, "wrm_3ps") ~ "3ps",
# Test
str_detect(variable, "generic") ~ "generic",
str_detect(variable, "specific") ~ "specific",
# Control
str_detect(variable, "num_phrs_match") ~ "number_or_phrasal_matching",
str_detect(variable, "cnjgtn_match") ~ "conjugation_matching",
T ~ NA_character_),
# Create condition
condition = ifelse(trial_subtype %in% c("generic", "specific"), paste0(trial_subtype), NA_character_),
# Create item
item = case_when(
str_detect(variable, "ride") ~ "ride",
str_detect(variable, "catch") ~ "catch",
str_detect(variable, "play") ~ "play",
str_detect(variable, "train") ~ "train",
str_detect(variable, "wash") ~ "wash",
str_detect(variable, "rake") ~ "rake",
str_detect(variable, "demo") ~ "demonstrative_pronoun",
str_detect(variable, "personal") ~ "personal_pronoun",
T ~ NA_character_),
# Correct (warmup trials)
response_correct = case_when(trial_subtype == "3pp" & response == "plural" ~ "Correct",
trial_subtype == "3ps" & response == "singular" ~ "Correct",
trial_subtype == "boy_correct" & response == "boy" ~ "Correct",
trial_subtype == "girl_correct" & response == "girl" ~ "Correct",
trial_type %in% c("test", "control") ~ NA_character_,
T ~ "Incorrect"),
across(where(is.character), ~ str_to_lower(.)), # convert characters to lowercase
across(where(is.character), ~ as.factor(.)), # convert characters to factors
id = id_new[id]) %>%
# Group by id, trial type
group_by(id, trial_type) %>%
# Count trial number
mutate(trial = row_number()) %>%
ungroup() %>%
# Remove "variable" column
select(-variable)
# Remove participants with zero test trials
trials <- gkcg_data0 %>% count(id, trial_subtype) %>% # count of each trial_subtype by participant
ungroup() %>%
complete(id,
trial_subtype,
fill = list(n = 0)) %>%
pivot_wider(id_cols = id,
names_from = trial_subtype,
values_from = n) %>%
filter(generic == 0 & specific == 0) # only participant 33 contributed no trials
# Participants included
gkcg_inclusions <- gkcg_data0 %>%
filter(!id %in% trials$id, # remove X participants who contributed no trials
id <= 50 + length(trials$id)) %>% # include X many more participants with id's over 50
distinct(id)
# Standardize age
gkcg_data_age_standardized0 <- gkcg_data0 %>%
filter(id %in% gkcg_inclusions$id) %>%
distinct(id, age_num, gender) %>%
mutate(age_standardized = as.numeric(scale(age_num))) # standardize age
gkcg_data <- gkcg_data0 %>%
filter(id %in% gkcg_inclusions$id) %>%
left_join(gkcg_data_age_standardized0 %>% select(-c(age_num, gender)), by = "id")
n_distinct(gkcg_data$id) # 50 participants
There were 568 observations from 50 participants (mean age: 4.56 years, range: 4.03 years to 4.99 years).
Of the 568 observations, 190 were warmup trials, 285 were test trials, and 93 were control trials.
Of the 50 participants, 29 were girls (mean age: 4.57 years, range: 4.09 years to 4.99 years) and 21 were boys (mean age: 4.54 years, range: 4.03 years to 4.99 years).
The trials completed, data, predictive accuracy, model, analysis, and posterior distributions are presented for the warmup and test phases of the experiment. The data from the control phase is displayed at the end.
Summary: Participants responded correctly at high rates in the warmup phase. The weakest evidence for systematic, correct responding was found for participants’ comprehension of the third-person plural in its personal (they) and demonstrative (those ones) forms. Nonetheless, correct response rates were high in these trials. (NB: Those ones is, really, a third-person plural construction, unlike they, but both are referred to as third-person plural pronouns due to their common function in this experiment.)
Participant counts are inset.
# statistics
data_distribution_warmup <- gkcg_data %>% # number of participants who completed X trials, per condition
filter(trial_type == "warmup") %>%
mutate(warmup_trial_type = case_when(trial_subtype %in% c("3pp", "3ps") ~ "Comprehension",
trial_subtype %in% c("boy_correct", "girl_correct") ~ "Attention")) %>%
count(name = "n_trials", id, warmup_trial_type) %>%
complete(id, warmup_trial_type, fill = list(n_trials = 0)) %>%
count(name = "n_participants", warmup_trial_type, n_trials) %>%
ungroup() %>%
group_by(warmup_trial_type) %>%
mutate(prop = n_participants / sum(n_participants),
n_trials = as.factor(n_trials),
vjust_text = ifelse(n_participants <= 2, -0.3, 1.3))
# figure
base_size <- 17
ggplot(data_distribution_warmup, aes(x = n_trials, y = prop, fill = "grey80")) +
facet_grid(~ warmup_trial_type, scales = "free") +
geom_bar(stat = "identity") +
geom_hline(yintercept = 0.5, linetype = 2) +
geom_text(aes(label = n_participants, vjust = vjust_text),
color = "black",
family = "Times New Roman",
size = 5) +
scale_y_continuous(
labels = function(x) sub("^0\\.", ".", sprintf("%.1f", x)),
breaks = c(0, .5)
) +
labs(
x = "Trials Completed",
y = "Proportion of Participants",
title = "Warmup Trials Completed"
) +
scale_fill_identity() +
theme_classic(base_size = base_size, base_family = "Times New Roman") +
theme(
axis.text.x = element_text(size = base_size - 1),
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_markdown(size = base_size - 1, hjust = 0.5)
)
ggsave("figs/warmup_trials.png", height = 4, width = 6)
Participant counts are inset.
descriptives <- function(trial_type_level){
gkcg_data %>%
filter(trial_type == {{ trial_type_level }}) %>% # Include only this trial type
count(trial_subtype, response) %>% # Count observations per cell
group_by(trial_subtype) %>% # Set grouping
mutate(prop = n / sum(n)) %>% # Compute proportion
ungroup() # Remove grouping
}
descriptives("warmup") # compute descriptives for warmup trials
# main figure
complete_boy <- descriptives("warmup") %>%
filter(trial_subtype %in% c("boy_correct", "girl_correct")) %>%
droplevels() %>%
complete(trial_subtype, response, fill = list(n = 0, prop = 0))
descriptives_warmup <- descriptives("warmup") %>%
filter(!trial_subtype %in% c("boy_correct", "girl_correct")) %>%
rbind(complete_boy) %>%
mutate(
trial_subtype = factor(case_when(
str_detect(trial_subtype, "girl") ~ "Girl",
str_detect(trial_subtype, "boy") ~ "Boy",
str_detect(trial_subtype, "3pp") ~ "Plural",
str_detect(trial_subtype, "3ps") ~ "Singular",
TRUE ~ NA_character_
), levels = c("Girl", "Boy", "Plural", "Singular")),
response = str_to_title(response),
response = factor(response, levels = c("Girl", "Boy", "Plural", "Singular")),
vjust_text = ifelse(n <= 4, -0.3, 1.3),
bar_fill = ifelse(trial_subtype == response, "grey50", "grey70"))
ggplot(descriptives_warmup, aes(x = response, y = prop, fill = bar_fill)) +
facet_grid(~ trial_subtype, scales = "free") +
geom_bar(stat = "identity") +
geom_hline(yintercept = 0.5, linetype = 2) +
geom_text(aes(label = n, vjust = vjust_text),
color = "black",
family = "Times New Roman",
size = 5) +
scale_y_continuous(
labels = function(x) sub("^0\\.", ".", sprintf("%.1f", x)),
breaks = c(0, .5, 1)
) +
labs(
x = "Response",
y = "Proportion of Participants",
subtitle = "Prediction: <span style='color:grey50'>Correct</span> > <span style='color:grey70'>Incorrect</span>",
title = "Warmup Phase Data"
) +
scale_fill_identity() +
theme_classic(base_size = base_size, base_family = "Times New Roman") +
theme(
axis.text.x = element_text(size = base_size - 1),
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_markdown(size = base_size - 1, hjust = 0.5)
)
ggsave("figs/warmup_main_data.png", height = 4, width = 8)
Here is the distribution of responses across plural and singular third-person demonstrative and personal pronouns. Participant counts are inset.
# Comprehension figure
comprehension_data <- gkcg_data %>%
filter(item %in% c("demonstrative_pronoun",
"personal_pronoun")) %>%
count(trial_subtype, item, response) %>%
ungroup() %>%
group_by(trial_subtype, item) %>%
mutate(response = str_to_title(response),
levels_for_graph = case_when(trial_subtype == "3pp" & str_detect(item, "dem") ~ "Plural Demonstrative",
trial_subtype == "3pp" & str_detect(item, "per") ~ "Plural Personal",
trial_subtype == "3ps" & str_detect(item, "dem") ~ "Singular Demonstrative",
trial_subtype == "3ps" & str_detect(item, "per") ~ "Singular Personal",
T ~ NA),
prop = n / sum(n),
vjust_text = ifelse(n == 0, -0.3, 1.3),
bar_fill = case_when(str_extract(levels_for_graph, "Plural") == response ~ "grey50",
str_extract(levels_for_graph, "Singular") == response ~ "grey50",
T ~ "grey70")) %>%
ungroup()
ggplot(comprehension_data, aes(x = response, y = prop, fill = bar_fill)) +
facet_grid(~ levels_for_graph, scales = "free") +
geom_bar(stat = "identity") +
geom_hline(yintercept = 0.5, linetype = 2) +
geom_text(aes(label = n, vjust = vjust_text),
color = "black",
family = "Times New Roman",
size = 5) +
scale_y_continuous(
labels = function(x) sub("^0\\.", ".", sprintf("%.1f", x)),
breaks = c(0, .5, 1)
) +
labs(
x = "Response",
y = "Proportion of Participants",
subtitle = "Prediction: <span style='color:grey50'>Correct</span> > <span style='color:grey70'>Incorrect</span>",
title = "Warmup Pronoun Comprehension Data"
) +
scale_fill_identity() +
theme_classic(base_size = base_size, base_family = "Times New Roman") +
theme(
axis.text.x = element_text(size = base_size - 1),
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_markdown(size = base_size - 1, hjust = 0.5)
)
ggsave("figs/warmup_comprehension_data.png", height = 4, width = 8)
predictive_accuracy_warmup <- gkcg_data %>% # How often were responses predicted compared to unpredicted?
filter(trial_type == "warmup") %>%
mutate(prediction = case_when(trial_subtype == "girl_correct" & response == "girl" ~ "accurate",
trial_subtype == "boy_correct" & response == "boy" ~ "accurate",
trial_subtype == "3pp" & response == "plural" ~ "accurate",
trial_subtype == "3ps" & response == "singular" ~ "accurate",
T ~ "inaccurate")) %>%
count(name = "count", prediction) %>%
mutate(odds_of_accurate_prediction = count[prediction == "accurate"] / count[prediction == "inaccurate"])
Of the 190 warmup trial observations, 161 were predicted accurately (e.g., participants chose a dyad in the plural comprehension trial or an individual in the singular trial). In contrast, 29 observations were predicted inaccurately. Thus, our post-hoc odds of accurately predicting participants’ response in each warmup trial was 5.55.
Models of the warmup observations.
# Priors
priors <- c(prior(normal(0, 1.00), Intercept),
prior(normal(0, 0.8), b),
prior(normal(0, 0.3), sd))
# Response family
response_family <- "bernoulli"
# Formula
warmup_formula <- brmsformula(response_correct ~ trial_subtype + (trial_subtype | id))
warmup_formula_compreheneion <- brmsformula(response_correct ~ trial_subtype + item + (trial_subtype + item | id))
warmup_data_to_be_modeled <- gkcg_data %>%
drop_na(response_correct) %>%
mutate(trial_subtype = fct_drop(trial_subtype),
response_correct = fct_relevel(response_correct, "incorrect", "correct"))
mod_warmup <- brm(formula = warmup_formula,
data = warmup_data_to_be_modeled,
family = response_family,
chains = detectCores() - 1,
cores = detectCores() - 1,
prior = priors,
refresh = 0,
backend = "cmdstanr")
mod_warmup_comprehension <- brm(formula = warmup_formula_compreheneion,
data = warmup_data_to_be_modeled %>% filter(!is.na(item)),
family = response_family,
chains = detectCores() - 1,
cores = detectCores() - 1,
prior = priors,
refresh = 0,
backend = "cmdstanr")
Analysis of the posterior models of the warmup phase observations.
# Main model
newdata_warmup <- tibble(trial_subtype = levels(mod_warmup$data$trial_subtype))
epreds <- local({
set.seed(10003)
epreds <- epred_draws(mod_warmup, newdata_warmup, re_formula = NA)
}) %>%
ungroup() %>%
mutate(trial_subtype = case_when(trial_subtype == "3pp" ~ "Plural",
trial_subtype == "3ps" ~ "Singular",
trial_subtype == "girl_correct" ~ "Girl",
trial_subtype == "boy_correct" ~ "Boy",
T ~ NA_character_),
trial_subtype = factor(trial_subtype, levels = c("Plural", "Singular", "Boy", "Girl")))
epreds_summarized_warmup <- epreds %>%
group_by(trial_subtype) %>%
summarize(
min = min(.epred),
hdi_lo = hdi(.epred)[1],
mean = mean(.epred),
hdi_hi = hdi(.epred)[2],
max = max(.epred),
prop_gt = mean(.epred > .5),
prop_gt_char = sprintf("%.3f", prop_gt) %>%
sub("^0", "", .)
)
# Comprehension model
newdata_warmup_c <- crossing(trial_subtype = levels(mod_warmup_comprehension$data$trial_subtype),
item = levels(mod_warmup_comprehension$data$item))
epreds_c <- local({
set.seed(10003)
epreds <- epred_draws(mod_warmup_comprehension, newdata_warmup_c, re_formula = NA)
}) %>%
ungroup() %>%
mutate(trial_subtype = case_when(trial_subtype == "3pp" ~ "Plural",
trial_subtype == "3ps" ~ "Singular",
trial_subtype == "girl_correct" ~ "Girl",
trial_subtype == "boy_correct" ~ "Boy",
T ~ NA_character_),
item = ifelse(item == "demonstrative_pronoun", "Demonstrative", "Personal"),
trial_subtype = factor(trial_subtype, levels = c("Plural", "Singular", "Boy", "Girl")))
epreds_summarized_warmup_c <- epreds_c %>%
group_by(trial_subtype, item) %>%
summarize(
min = min(.epred),
hdi_lo = hdi(.epred)[1],
mean = mean(.epred),
hdi_hi = hdi(.epred)[2],
max = max(.epred),
prop_gt = mean(.epred > .5),
prop_gt_char = sprintf("%.3f", prop_gt) %>%
sub("^0", "", .)
)
Posterior distributions of the mean rate of choosing correctly.
ggplot(epreds,
aes(x = .epred,
y = trial_subtype)) +
stat_halfeye(aes(fill = after_stat(x < 0.5)),
.width = 0.95,
slab_alpha = 0.6,
slab_linewidth = 0.5,
slab_color = "black") +
scale_fill_manual(values = c("TRUE" = "grey70",
"FALSE" = "grey30"),
guide = "none") +
geom_text(data = epreds_summarized_warmup,
aes(x = hdi_hi + .02,
vjust = -2.25,
label = prop_gt_char),
size = 4.5,
family = "Times New Roman",
fontface = "bold",
color = "grey40") +
geom_vline(xintercept = .5,
alpha = .3,
linetype = 2) +
labs(x = "Predicted Mean",
title = "Warmup Phase Results",
subtitle = "Posterior Rate of Correct Responding",
y = "Trial Type") +
scale_x_continuous(labels = function(x) sub("^0\\.", ".", sprintf("%.2f", x)),
limits = c(0.41, 1),
breaks = c(.5, .75, 1.00)) +
scale_y_discrete(expand = expansion(mult = c(0.05, NA))) +
theme_classic(base_size = base_size,
base_family = "Times New Roman") +
theme(axis.text.y = element_text(vjust = -1.5),
axis.ticks.y = element_blank(),
plot.subtitle = element_text(hjust = .5, size = base_size - 3),
plot.title = element_text(hjust = .5, face = "bold")) +
annotate("text",
x = .44,
y = 4.5,
label = "More\noften\nincorrect",
color = "grey70",
family = "Times New Roman",
fontface = "bold",
size = 4.5) +
annotate("text",
x = .55,
y = 4.5,
label = "More\noften\ncorrect",
color = "grey50",
family = "Times New Roman",
fontface = "bold",
size = 4.5)
ggsave("figs/warmup_results.png", height = 4, width = 6)
ggplot(epreds_c,
aes(x = .epred,
y = item)) +
facet_grid(trial_subtype ~ .) +
stat_halfeye(aes(fill = after_stat(x < 0.5)),
.width = 0.95,
slab_alpha = 0.6,
slab_linewidth = 0.5,
slab_color = "black") +
scale_fill_manual(values = c("TRUE" = "grey70",
"FALSE" = "grey30"),
guide = "none") +
geom_text(data = epreds_summarized_warmup_c,
aes(x = hdi_hi + .02,
vjust = -2.25,
label = prop_gt_char),
size = 4.5,
family = "Times New Roman",
fontface = "bold",
color = "grey40") +
geom_vline(xintercept = .5,
alpha = .3,
linetype = 2) +
labs(x = "Predicted Mean",
title = "Warmup Comprehension Results",
subtitle = "Posterior Rate of Correct Responding",
y = "Third-Person Pronoun") +
scale_x_continuous(labels = function(x) sub("^0\\.", ".", sprintf("%.2f", x)),
limits = c(0.41, 1),
breaks = c(.5, .75, 1.00)) +
scale_y_discrete(expand = expansion(mult = c(0.07, NA))) +
theme_classic(base_size = base_size,
base_family = "Times New Roman") +
theme(axis.text.y = element_text(vjust = -1.5),
axis.ticks.y = element_blank(),
plot.subtitle = element_text(hjust = .5, size = base_size - 3),
plot.title = element_text(hjust = .5, face = "bold")) +
geom_text(
data = data.frame(
trial_subtype = "Plural",
x = c(.45, .54),
y = c(2.6, 2.6),
label = c("More\noften\nincorrect",
"More\noften\ncorrect"),
color = c("grey70", "grey50")
),
aes(x = x, y = y, label = label, color = color),
inherit.aes = FALSE,
family = "Times New Roman",
fontface = "bold",
size = 4,
show.legend = FALSE
)
ggsave("figs/warmup_comprehension_results.png", height = 4, width = 6)
Summary: Our directional predictions were qualitatively accurate. Moreover, participants made generic interpretations of they at rates numerically above chance and specific interpretations of this one at rates numerically below chance. However, the evidence for these directional effects was not especially strong.
Surprisingly, the estimated effect size and locations of the means were nearly exactly those that I had simulated in my early sample size planning analyses for this experiment. Based on those analyses, we need a larger sample (~100 participants) to be confident in obtaining strong evidence for the predicted directional effect. Because the value we use for inference (the posterior odds) is mathematically equivalent to a Bayes factor for the directional effect under our prior configuration, and Bayes factors are robust to optional stopping (unlike p-values), we can simply resume sampling children until we obtain the desired level of evidence for our predicted directional effect.
Here is the distribution of the number of test trials completed per trial type. Participant counts are inset.
# statistics
data_distribution <- gkcg_data %>% # number of participants who completed X trials, per condition
drop_na(condition) %>%
count(name = "n_trials", id, condition) %>%
ungroup() %>%
count(name = "n_participants", condition, n_trials) %>%
ungroup() %>%
group_by(condition) %>%
mutate(prop = n_participants / sum(n_participants),
condition = paste0(str_to_title(condition), " Condition"),
vjust_text = ifelse(n_participants <= 2, -0.3, 1.3))
# figure
ggplot(data_distribution, aes(x = n_trials, y = prop, fill = "grey80")) +
facet_grid(~ condition, scales = "free") +
geom_bar(stat = "identity") +
geom_hline(yintercept = 0.5, linetype = 2) +
geom_text(aes(label = n_participants, vjust = vjust_text),
color = "black",
family = "Times New Roman",
size = 5) +
scale_y_continuous(
labels = function(x) sub("^0\\.", ".", sprintf("%.1f", x)),
breaks = c(0, .5, 1)
) +
labs(
x = "Trials Completed",
y = "Proportion of Participants",
title = "Test Trials Completed"
) +
scale_fill_identity() +
theme_classic(base_size = base_size, base_family = "Times New Roman") +
theme(
axis.text.x = element_text(size = base_size - 1),
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_markdown(size = base_size - 1, hjust = 0.5)
)
ggsave("figs/test_trials.png", height = 4, width = 8)
Test phase observations.
# Compute individuals' outcome distribution
individual_dat <- gkcg_data %>%
drop_na(condition) %>%
group_by(id) %>%
count(condition, response, age_standardized) %>%
ungroup()
# By participant
individual_dat %>%
complete(id, condition, response, fill = list(n = 0)) %>%
group_by(id, condition) %>%
mutate(
sum = sum(n),
prop = ifelse(sum == 0, 0, n / sum)
) %>%
ungroup() %>%
mutate(response_profile = case_when(condition == "generic" & response == "zarpie" & prop > .5 ~ "predicted",
condition == "specific" & response == "flurp" & prop > .5 ~ "predicted",
T ~ "unpredicted")) %>%
filter(condition == "generic" & response == "zarpie" |
condition == "specific" & response == "flurp") %>%
count(response_profile)
# Descriptives
descriptives("test") # compute descriptives for test trials
# figure
pos <- position_jitter(width = 0.1, height = 0.05, seed = 10003)
begin_value <- 0.15
end_value <- 0.85
test_results0 <- individual_dat %>%
group_by(id) %>%
complete(condition, response, fill = list(n = 0)) %>%
ungroup() %>%
group_by(id, condition) %>%
mutate(sum = sum(n),
prop = round(n / sum(n), 2),
condition = str_to_title(condition)) %>%
ungroup() %>%
filter(response == "zarpie")
test_results1 <- test_results0 %>%
pivot_wider(id_cols = id,
names_from = condition,
values_from = prop) %>%
mutate(more_zarpie_responses_in = factor(case_when(
Generic > Specific ~ "Generic\nCondition",
Generic == Specific ~ "Equal",
Generic < Specific ~ "Specific\nCondition",
T ~ NA),
levels = c("Generic\nCondition", "Equal", "Specific\nCondition")))
test_results <- test_results0 %>%
left_join(test_results1 %>% select(id, more_zarpie_responses_in), by = "id")
p1 <- ggplot(test_results %>%
drop_na(more_zarpie_responses_in), aes(x = condition, y = prop)) +
geom_line(aes(group = id,
color = factor(more_zarpie_responses_in)),
linewidth = 0.8,
alpha = .5,
position = pos) +
geom_jitter(aes(color = factor(more_zarpie_responses_in)),
size = 2.5,
position = pos) +
stat_summary(fun = mean,
geom = "point",
shape = 21,
size = 3,
fill = "yellow",
alpha = 1,
color = "darkgreen",
stroke = 1.2) +
stat_summary(fun = mean,
geom = "text",
aes(label = sub("^0\\.", ".",
sprintf("%.2f", after_stat(y))),
hjust = ifelse(condition == "Generic", 1.4, -.4)),
color = "black",
family = "Times New Roman",
size = 5) +
scale_color_viridis_d(begin = begin_value, end = end_value) +
theme_classic(base_size = base_size,
base_family = "Times New Roman") +
scale_y_continuous(limits = c(-0.05, 1.05),
breaks = c(0, .33, .66, 1),
labels = function(x) sub("^0\\.", ".", sprintf("%.2f", x))) +
geom_hline(yintercept = 0.5, linetype = 2) +
labs(x = "Condition",
y = "Proportion Choosing Zarpie",
subtitle = "Prediction: Generic > Specific",
title = "Test Phase Data") +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = base_size - 1, hjust = .5),
axis.text = element_text(size = base_size - 1),
legend.position = "none")
nrows <- nrow(test_results1)
p2 <- ggplot(test_results1 %>%
drop_na(more_zarpie_responses_in),
aes(x = more_zarpie_responses_in, fill = more_zarpie_responses_in)) +
geom_bar(stat = "count") +
theme_classic(base_size = base_size,
base_family = "Times New Roman") +
labs(x = "More Zarpie Responses",
y = "Number of Participants",
subtitle = "Prediction: Generic > Specific",
title = "Zarpie Response Profiles") +
scale_fill_viridis_d(begin = begin_value, end = end_value) +
scale_y_continuous(breaks = c(0, 10, 20)) +
theme(plot.title = element_text(hjust = .5,
face = "bold"),
plot.subtitle = element_text(size = base_size - 1,
hjust = .5),
axis.text = element_text(size = base_size - 1),
legend.position = "none")
p1 + p2
ggsave("figs/test_data.png", height = 4, width = 9)
predictive_accuracy <- individual_dat %>% # How often were responses predicted compared to unpredicted?
mutate(predicted = case_when(condition == "generic" & response == "zarpie" ~ 1,
condition == "specific" & response == "flurp" ~ 1,
T ~ -1),
predicted_scaled = predicted * n) %>%
group_by(predicted) %>%
summarize(count = sum(predicted_scaled), .groups = "drop") %>% # how often did kids do predicted relative to unpredicted stuff?
mutate(predicted = ifelse(predicted == 1, "predicted_response", "unpredicted_response"),
odds_of_predicted_response =
abs(
count[predicted == "predicted_response"] /
count[predicted == "unpredicted_response"]
)
)
Of the 285 test trial observations, 152 were predicted accurately (i.e., participants chose Zarpies in the generic condition and Flurps in the specific condition). In contrast, 133 observations were predicted inaccurately. Thus, our post-hoc odds of accurately predicting participants’ response in each test trial was 1.14.
Models of the test phase observations.
# Model formula
formyouluh <- brmsformula(response ~
condition +
(condition | id) + (1 | item))
# Dataset to be modeled
test_data_to_be_modeled <- gkcg_data %>%
drop_na(condition) %>%
mutate(response = fct_relevel(response, "flurp", "zarpie"))
# Model
mod_test <- brm(formula = formyouluh,
data = test_data_to_be_modeled,
family = response_family,
chains = detectCores() - 1,
cores = detectCores() - 1,
prior = priors,
refresh = 0,
backend = "cmdstanr")
cat_dat <- test_results1 %>%
drop_na(more_zarpie_responses_in) %>%
select(more_zarpie_responses_in)
mod_test_cat <- brm(formula = more_zarpie_responses_in ~ 1,
data = cat_dat,
family = categorical,
chains = detectCores() - 1,
cores = detectCores() - 1,
prior = c(prior(normal(0, 1.00), Intercept, dpar = muEqual),
prior(normal(0, 1.00), Intercept, dpar = muSpecificCondition)),
refresh = 0,
backend = "cmdstanr")
Analysis of the posterior models of the test phase observations.
# quick checks
# conditional_effects(mod_test, effects = "condition")
hypothesis(mod_test, "conditionspecific > 0")
hypothesis(mod_test_cat, "muSpecificCondition_Intercept > 0")
# epreds
newdata <- crossing(
condition = unique(levels(mod_test$data$condition)))
# depicted_gender = unique(levels(mod_test$data$depicted_gender)),
# gender = unique(levels(mod_test$data$gender)),
# age_standardized = seq(from = min(mod_test$data$age_standardized),
# to = max(mod_test$data$age_standardized),
# length.out = 20))
epreds <- local(
{
set.seed(10003)
epred_draws(mod_test, newdata, re_formula = NA)
}
) %>%
ungroup()
func_summarize <- function(df, quantity, prop_gt_val) {
df %>%
summarize(
min = min({{quantity}}),
hdi_lo = hdi({{quantity}})[1],
mean = mean({{quantity}}),
hdi_hi = hdi({{quantity}})[2],
max = max({{quantity}}),
prop_gt = mean({{quantity}} > prop_gt_val),
prop_gt_char = sprintf("%.3f", prop_gt) %>%
sub("^0", "", .)
) %>%
print()
}
func_epreds <- function(variable){
# Capture variable name as string
var_name <- rlang::as_name(rlang::enquo(variable))
# Marginalize predictions
epreds_marginalized <- epreds %>%
group_by(.draw, {{variable}}) %>%
summarize(mean_epred = mean(.epred), .groups = "drop") %>%
mutate(
!!var_name := str_to_title(as.character({{variable}})) # dynamic rename
)
# Summarize marginal predictions
epreds_marginalized_summary <- epreds_marginalized %>%
group_by(!!sym(var_name)) %>% # use dynamic column
func_summarize(mean_epred, 0.5)
# Compute contrasts
epreds_marginalized_contrasts <- epreds_marginalized %>%
compare_levels(mean_epred, by = var_name) %>% # dynamic column name
ungroup()
epreds_marginalized_contrasts_summary <- epreds_marginalized_contrasts %>%
func_summarize(mean_epred, 0)
# Return list
return(list(
epreds_marginalized = epreds_marginalized,
epreds_marginalized_summary = epreds_marginalized_summary,
epreds_marginalized_contrasts = epreds_marginalized_contrasts,
epreds_marginalized_contrasts_summary = epreds_marginalized_contrasts_summary
))
}
# results_gender <- func_epreds(gender) # gender results
# results_depgender <- func_epreds(depicted_gender) # depicted gender results
results_condition <- func_epreds(condition) # condition results
# epreds_slope <- epreds %>%
# group_by(.draw) %>%
# summarize(age_slope = cov(age_standardized, .epred)/var(age_standardized),
# .groups = "drop")
#
# results_age <- epreds_slope %>%
# func_summarize(age_slope, 0) %>%
# ungroup()
# categorical model
newdata_cat <- crossing() # use crossing, not tibble, for intercepts-only predictions (epred_draws requires 1 row)
epreds_cat <- local({
set.seed(10003)
epreds <- epred_draws(mod_test_cat, newdata_cat)
}) %>%
ungroup()
epreds_cat_summary <- epreds_cat %>%
group_by(.category) %>%
summarize(
min = min(.epred),
hdi_lo = hdi(.epred)[1],
mean = mean(.epred),
hdi_hi = hdi(.epred)[2],
max = max(.epred),
prop_gt = mean(.epred > .333),
prop_gt_char = sprintf("%.3f", prop_gt) %>%
sub("^0", "", .)
) %>%
ungroup()
epreds_cat_contrasts <- epreds_cat %>%
compare_levels(.epred, by = ".category") %>%
ungroup()
epreds_cat_contrasts_summary <- epreds_cat_contrasts %>%
group_by(.category) %>%
summarize(
min = min(.epred),
hdi_lo = hdi(.epred)[1],
mean = mean(.epred),
hdi_hi = hdi(.epred)[2],
max = max(.epred),
prop_gt = mean(.epred > 0),
prop_gt_char = sprintf("%.3f", prop_gt) %>%
sub("^0", "", .))
Posterior distributions of the mean rate of choosing Zarpies (qua generic interpretation). X-axes display the probability scale. The scale for the effect size is absolute value.
These group-level results suggest that children chose Zarpies at lower rates in the specific condition than in the generic condition, as predicted. However, the evidence for this ought to be stronger.
p3 <- ggplot(results_condition$epreds_marginalized,
aes(x = mean_epred,
y = condition)) +
stat_halfeye(aes(fill = after_stat(x < 0.5)),
.width = 0.95,
slab_alpha = 0.6,
slab_linewidth = 0.5,
slab_color = "black") +
scale_fill_manual(values = c("TRUE" = "grey70",
"FALSE" = "grey30"),
guide = "none") +
geom_text(data = results_condition$epreds_marginalized_summary,
aes(x = hdi_hi + .03,
vjust = -3.5,
label = prop_gt_char),
size = 4.5,
family = "Times New Roman",
fontface = "bold",
color = "grey40") +
geom_vline(xintercept = .5,
alpha = .3,
linetype = 2) +
labs(x = "Predicted Mean",
subtitle = "Posterior Predicted Means",
y = NULL) +
scale_x_continuous(labels = function(x) sub("^0\\.", ".", sprintf("%.1f", x)),
breaks = c(.3, .5, .7)) +
scale_y_discrete(expand = expansion(mult = c(0.05, .85))) +
theme_classic(base_size = base_size,
base_family = "Times New Roman") +
theme(axis.text.y = element_text(vjust = -3.5),
axis.ticks.y = element_blank(),
plot.subtitle = element_text(hjust = .5)) +
annotate("text",
x = .31,
y = 2.94,
label = "Flurps > Zarpies",
hjust = .25,
color = "grey70",
family = "Times New Roman",
fontface = "bold",
size = 4) +
annotate("text",
x = .64,
y = 2.94,
label = "Zarpies > Flurps",
hjust = .5,
color = "grey50",
family = "Times New Roman",
fontface = "bold",
size = 4)
p4 <- ggplot(results_condition$epreds_marginalized_contrasts,
aes(x = mean_epred)) +
stat_halfeye(aes(fill = after_stat(x < 0)),
.width = 0.95,
slab_alpha = 0.6,
slab_linewidth = 0.5,
slab_color = "black") +
scale_fill_manual(values = c("TRUE" = "grey70",
"FALSE" = "grey30"),
guide = "none") +
geom_text(data = results_condition$epreds_marginalized_contrasts_summary,
aes(x = hdi_hi + .03,
label = prop_gt_char),
size = 4.5,
y = .3,
family = "Times New Roman",
fontface = "bold",
color = "grey40") +
geom_vline(xintercept = 0,
alpha = .3,
linetype = 2) +
labs(x = "Effect Size",
subtitle = "Posterior Effect Size",
y = "Density") +
scale_x_continuous(breaks = c(-.3, 0, .3),
expand = expansion(mult = c(.1, .2)),
labels = function(x) {
x_abs <- abs(x) # absolute
sprintf("%.1f", x_abs) %>%
sub("^0\\.", ".", .)}) +
theme_classic(base_size = base_size,
base_family = "Times New Roman") +
theme(axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.subtitle = element_text(hjust = .5)) +
annotate("text",
x = -.2,
y = 1,
label = "Generic > Specific",
hjust = .45,
color = "grey70",
family = "Times New Roman",
fontface = "bold",
size = 4) +
annotate("text",
x = .21,
y = 1,
label = "Specific > Generic",
hjust = .5,
color = "grey50",
family = "Times New Roman",
fontface = "bold",
size = 4)
p3 + p4 +
plot_annotation(title = "Test Phase Results") &
theme(plot.title = element_text(hjust = .5, face = "bold", family = "Times New Roman", size = base_size + 2))
ggsave("figs/test_results.png", height = 4, width = 8)
The participant-level results, below (left), suggest that participants chose Zarpies more often in the generic condition than in the specific condition at rates above chance. In contrast, participants chose Zarpies more often in the specific condition, and at equal rates in both conditions, at rates below chance.
The results on the right suggest that participants were more likely to choose Zarpies more often in the generic condition than in the specific condition than the opposite (i.e., generic > specific vs specific > generic; top distribution) and compared to choosing Zarpies at equal rates in both condition (i.e., generic > equal; bottom distribution). Moreover, participants were similarly likely to choose Zarpies at rates similar in both condition compared to choosing Zarpies at higher rates in the specific than in the generic condition (i.e., equal vs. specific; middle distribution).
While all of these directional effects follow our predictions, the evidence for them ought to be stronger.
p_cat_means <- ggplot(epreds_cat %>% mutate(.category = ifelse(.category == "Equal", "Equal\n", paste0(.category))),
aes(x = .epred,
y = .category)) +
stat_halfeye(aes(fill = after_stat(x < 0.333)),
.width = 0.95,
slab_alpha = 0.6,
slab_linewidth = 0.5,
slab_color = "black") +
scale_fill_manual(values = c("TRUE" = "grey70",
"FALSE" = "grey30"),
guide = "none") +
geom_text(data = epreds_cat_summary %>% mutate(.category = ifelse(.category == "Equal", "Equal\n", paste0(.category))),
aes(x = hdi_hi + .03,
vjust = -3.5,
label = prop_gt_char),
size = 4.5,
family = "Times New Roman",
fontface = "bold",
color = "grey40") +
geom_vline(xintercept = .333,
alpha = .3,
linetype = 2) +
labs(x = "Predicted Probability",
subtitle = "Posterior Predicted Probability",
y = NULL) +
scale_x_continuous(labels = function(x) sub("^0\\.", ".", sprintf("%.1f", x)),
breaks = c(.1, .33, .6)) +
scale_y_discrete(expand = expansion(mult = c(0.05, .6))) +
theme_classic(base_size = base_size,
base_family = "Times New Roman") +
theme(axis.text.y = element_text(vjust = -.7, hjust = .5),
axis.ticks.y = element_blank(),
plot.subtitle = element_text(hjust = .5)) +
geom_text(
data = data.frame(
x = c(.2, .5),
y = c(4.05, 4.05),
label = c("Below Chance",
"Above Chance"),
color = c("grey70", "grey50")
),
aes(x = x, y = y, label = label, color = color),
inherit.aes = FALSE,
family = "Times New Roman",
fontface = "bold",
size = 4,
show.legend = FALSE
)
p_cat_contrasts <- ggplot(epreds_cat_contrasts,
aes(x = .epred,
y = .category)) +
stat_halfeye(aes(fill = after_stat(x < 0)),
.width = 0.95,
slab_alpha = 0.6,
slab_linewidth = 0.5,
slab_color = "black",
height = .74) +
scale_fill_manual(values = c("TRUE" = "grey70",
"FALSE" = "grey30"),
guide = "none") +
geom_text(data = epreds_cat_contrasts_summary,
aes(x = hdi_hi + .03,
label = prop_gt_char),
size = 4.5,
vjust = -3,
family = "Times New Roman",
fontface = "bold",
color = "grey40") +
scale_y_discrete(expand = expansion(mult = c(0.05, .47))) +
geom_vline(xintercept = 0,
alpha = .3,
linetype = 2) +
labs(x = "Effect Size",
subtitle = "More Common In The:",
y = "Density") +
scale_x_continuous(breaks = c(-.3, 0, .3),
labels = function(x) {
x_abs <- abs(x) # absolute value
sprintf("%.1f", x_abs) %>%
sub("^0\\.", ".", .)}) +
theme_classic(base_size = base_size,
base_family = "Times New Roman") +
theme(axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.subtitle = element_text(hjust = .5)) +
geom_text(
data = tibble(
y = c(rep("Equal - Generic\nCondition", 2),
rep("Specific\nCondition - Generic\nCondition", 2),
rep("Specific\nCondition - Equal", 2)),
x = rep(c(-.3, .3), 3),
label = c("Generic Condition", "Equal",
"Generic Condition", "Specific Condition",
"Equal", "Specific Condition"),
color = rep(c("grey70", "grey50"), 3)
),
aes(x = x, y = y, label = label, color = color),
inherit.aes = FALSE,
family = "Times New Roman",
vjust = -7.9,
fontface = "bold",
size = 4,
show.legend = FALSE
)
p_cat_means + p_cat_contrasts +
plot_annotation(title = "Zarpie Response Profiles") &
theme(plot.title = element_text(hjust = .5, face = "bold", family = "Times New Roman", size = base_size + 2))
ggsave("figs/test_results_cat.png", height = 4, width = 8)
Here’s the control phase data.
descriptives("control") # compute descriptives for control trials
# figure
descriptives("control") %>%
mutate(
trial_subtype = factor(case_when(trial_subtype == "conjugation_matching" ~ "Conjugation",
trial_subtype == "number_or_phrasal_matching" ~ "Number or Phrase",
T ~ NA_character_)),
response = str_to_title(response),
vjust_text = ifelse(n == 5, -0.3, 1.3)) %>%
ggplot(aes(x = response, y = prop)) +
facet_grid(~ trial_subtype, scales = "free") +
geom_bar(stat = "identity", fill = "grey") +
geom_hline(yintercept = 0.5, linetype = 2) +
scale_y_continuous(labels = function(x) sub("^0\\.", ".", sprintf("%.1f", x)),
breaks = c(0, .5, 1),
limits = c(0, 1)) +
geom_text(aes(label = n, vjust = vjust_text),
color = "black",
family = "Times New Roman",
size = 5) +
labs(x = "Response",
y = "Proportion of Participants",
subtitle = "Prediction: Flurp = Zarpie",
title = "Control Trials") +
theme_classic(base_size = base_size, base_family = "Times New Roman") +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = base_size - 1, hjust = .5),
axis.text = element_text(size = base_size - 1))
ggsave("figs/data_control.png", height = 4, width = 6)
print(sessionInfo())
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] rstan_2.35.0.9000 StanHeaders_2.35.0.9000 lubridate_1.9.4
## [4] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [7] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [10] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
## [13] tidybayes_3.0.7 readxl_1.4.3 patchwork_1.3.0
## [16] knitr_1.49 janitor_2.2.1 ggtext_0.1.2
## [19] brms_2.22.9 Rcpp_1.0.14
##
## loaded via a namespace (and not attached):
## [1] gridExtra_2.3 inline_0.3.21 rlang_1.1.5
## [4] magrittr_2.0.3 snakecase_0.11.1 matrixStats_1.5.0
## [7] compiler_4.4.2 loo_2.8.0.9000 systemfonts_1.2.1
## [10] vctrs_0.6.5 pkgconfig_2.0.3 arrayhelpers_1.1-0
## [13] crayon_1.5.3 fastmap_1.2.0 backports_1.5.0
## [16] labeling_0.4.3 utf8_1.2.4 cmdstanr_0.8.1
## [19] rmarkdown_2.29 markdown_1.13 tzdb_0.4.0
## [22] ps_1.8.1 ragg_1.3.3 bit_4.5.0.1
## [25] xfun_0.50 cachem_1.1.0 jsonlite_1.8.9
## [28] R6_2.5.1 bslib_0.9.0 stringi_1.8.4
## [31] jquerylib_0.1.4 cellranger_1.1.0 estimability_1.5.1
## [34] bayesplot_1.11.1.9000 Matrix_1.7-2 timechange_0.3.0
## [37] tidyselect_1.2.1 rstudioapi_0.17.1 abind_1.4-8
## [40] yaml_2.3.10 codetools_0.2-20 curl_6.2.0
## [43] processx_3.8.5 pkgbuild_1.4.6 lattice_0.22-6
## [46] withr_3.0.2 bridgesampling_1.1-2 posterior_1.6.0
## [49] coda_0.19-4.1 evaluate_1.0.3 RcppParallel_5.1.10
## [52] ggdist_3.3.2 xml2_1.3.6 pillar_1.10.1
## [55] tensorA_0.36.2.1 checkmate_2.3.2 stats4_4.4.2
## [58] distributional_0.5.0 generics_0.1.3 vroom_1.6.5
## [61] hms_1.1.3 rstantools_2.4.0.9000 munsell_0.5.1
## [64] commonmark_1.9.2 scales_1.3.0 xtable_1.8-4
## [67] glue_1.8.0 emmeans_1.10.7 tools_4.4.2
## [70] data.table_1.16.4 mvtnorm_1.3-3 grid_4.4.2
## [73] QuickJSR_1.5.1 colorspace_2.1-1 nlme_3.1-167
## [76] cli_3.6.3 textshaping_1.0.0 svUnit_1.0.6
## [79] viridisLite_0.4.2 Brobdingnag_1.2-9 V8_8.0.1
## [82] gtable_0.3.6 sass_0.4.9 digest_0.6.37
## [85] farver_2.1.2 htmltools_0.5.8.1 lifecycle_1.0.4
## [88] gridtext_0.1.5 bit64_4.6.0-1