This script fits the models and produces the figures reported in the
Results section of the hybrid-drawing paper. Models test whether each
hybrid drawing contains recognizable content from its two constituent
base concepts, using CLIP-based classification ranks against a set of 14
base categories.
Data
data_raw <- read.csv('../data_intermediates/analysis/prolific_k_way_classification_all_children_anonymized.csv',
stringsAsFactors = FALSE)
# coerce string booleans to logical
str_bool_cols <- c("is_adult", "concept1_available", "concept2_available",
"constituent1_in_top_1", "constituent2_in_top_1",
"constituent1_in_top_5", "constituent2_in_top_5",
"both_constituents_in_top_1", "both_constituents_in_top_5")
data_raw[str_bool_cols] <- lapply(data_raw[str_bool_cols],
function(x) as.logical(as.character(x)))
cat("rows in raw dataset:", nrow(data_raw), "\n")
## rows in raw dataset: 2938
cat("unique participants:", length(unique(data_raw$participant_id)), "\n")
## unique participants: 739
cat("hybrid categories:", length(unique(data_raw$hybrid_category)), "\n")
## hybrid categories: 8
cat("is_adult breakdown in raw dataset:\n")
## is_adult breakdown in raw dataset:
print(table(data_raw$is_adult, useNA = "always"))
##
## FALSE TRUE <NA>
## 2723 215 0
# child analyses use ages 3-10; adults are handled separately
data <- data_raw %>%
filter(is_adult == FALSE | is.na(is_adult)) %>%
filter(age_numeric >= 3)
cat("after excluding adults and under-3s:", nrow(data), "drawings\n")
## after excluding adults and under-3s: 2513 drawings
cat("sessions:", length(unique(data$participant_id)), "\n")
## sessions: 613
cat("age range:", range(data$age_numeric), "\n")
## age range: 3 10
cat("mean age:", round(mean(data$age_numeric), 2),
"SD:", round(sd(data$age_numeric), 2), "\n")
## mean age: 5.82 SD: 2.06
print(table(data$age_numeric))
##
## 3 4 5 6 7 8 9 10
## 338 451 415 484 321 166 135 203
# center age; coerce boolean columns
data$age_centered <- scale(data$age_numeric, center = TRUE, scale = FALSE)[, 1]
# restrict to drawings whose constituents both have a base-category centroid
data <- data %>%
filter(concept1_available == TRUE & concept2_available == TRUE)
cat("after filtering for available centroids:", nrow(data), "drawings\n")
## after filtering for available centroids: 2191 drawings
cat("sessions:", length(unique(data$participant_id)), "\n")
## sessions: 600
# exact chance under a uniformly random permutation of 14 base categories
n_concepts <- 14
individual_chance <- 5 / n_concepts # single constituent in top 5
both_chance <- (5 * 4) / (n_concepts * (n_concepts - 1)) # both specific constituents in top 5
cat("single-constituent top-5 chance:", round(individual_chance, 3), "\n")
## single-constituent top-5 chance: 0.357
cat("joint top-5 chance:", round(both_chance, 3), "\n")
## joint top-5 chance: 0.11
data$individual_chance <- individual_chance
data$both_chance <- both_chance
Descriptive
statistics
overall <- data %>%
summarise(
n = n(),
prop_c1_top5 = mean(constituent1_in_top_5),
prop_c2_top5 = mean(constituent2_in_top_5),
prop_both_top5 = mean(both_constituents_in_top_5)
)
print(overall)
## n prop_c1_top5 prop_c2_top5 prop_both_top5
## 1 2191 0.587403 0.4997718 0.2409859
per_age <- data %>%
group_by(age_numeric) %>%
summarise(
n = n(),
prop_c1_top5 = round(mean(constituent1_in_top_5), 3),
prop_c2_top5 = round(mean(constituent2_in_top_5), 3),
prop_both_top5 = round(mean(both_constituents_in_top_5), 3),
.groups = 'drop'
)
print(per_age)
## # A tibble: 8 × 5
## age_numeric n prop_c1_top5 prop_c2_top5 prop_both_top5
## <dbl> <int> <dbl> <dbl> <dbl>
## 1 3 292 0.462 0.497 0.216
## 2 4 397 0.501 0.516 0.207
## 3 5 360 0.614 0.464 0.233
## 4 6 417 0.638 0.484 0.249
## 5 7 279 0.663 0.48 0.254
## 6 8 147 0.673 0.463 0.218
## 7 9 120 0.642 0.508 0.283
## 8 10 179 0.587 0.631 0.324
per_category <- data %>%
group_by(hybrid_category) %>%
summarise(
n = n(),
prop_c1_top5 = round(mean(constituent1_in_top_5), 3),
prop_c2_top5 = round(mean(constituent2_in_top_5), 3),
prop_both_top5 = round(mean(both_constituents_in_top_5), 3),
.groups = 'drop'
) %>%
arrange(desc(prop_both_top5))
print(per_category)
## # A tibble: 7 × 5
## hybrid_category n prop_c1_top5 prop_c2_top5 prop_both_top5
## <chr> <int> <dbl> <dbl> <dbl>
## 1 a tiger frog 269 0.651 0.636 0.45
## 2 a mushroom house 349 0.691 0.47 0.404
## 3 a bike bee 318 0.566 0.553 0.299
## 4 a sheep fish 303 0.759 0.465 0.254
## 5 a train cat 343 0.531 0.551 0.181
## 6 an elephant snail 296 0.476 0.483 0.095
## 7 a rabbit boat 313 0.441 0.355 0.013
Primary models
# constituent 1 above chance + age effect
model1 <- glmer(constituent1_in_top_5 ~ age_centered +
offset(qlogis(individual_chance)) +
(age_centered | hybrid_category) +
(1 | participant_id),
data = data, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(model1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## constituent1_in_top_5 ~ age_centered + offset(qlogis(individual_chance)) +
## (age_centered | hybrid_category) + (1 | participant_id)
## Data: data
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 2869.8 2904.0 -1428.9 2857.8 2185
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1293 -0.9862 0.5860 0.8020 1.5150
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## participant_id (Intercept) 0.156013 0.39498
## hybrid_category (Intercept) 0.221825 0.47098
## age_centered 0.001329 0.03645 -0.50
## Number of obs: 2191, groups: participant_id, 600; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.96628 0.18480 5.229 1.71e-07 ***
## age_centered 0.11150 0.02805 3.975 7.03e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age_centerd -0.226
# constituent 2 above chance + age effect
model2 <- glmer(constituent2_in_top_5 ~ age_centered +
offset(qlogis(individual_chance)) +
(age_centered | hybrid_category) +
(1 | participant_id),
data = data, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(model2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## constituent2_in_top_5 ~ age_centered + offset(qlogis(individual_chance)) +
## (age_centered | hybrid_category) + (1 | participant_id)
## Data: data
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 3003.0 3037.2 -1495.5 2991.0 2185
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6950 -0.9434 -0.6961 0.9933 1.4147
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## participant_id (Intercept) 0.021861 0.14786
## hybrid_category (Intercept) 0.101631 0.31880
## age_centered 0.005331 0.07302 0.83
## Number of obs: 2191, groups: participant_id, 600; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.59691 0.12838 4.649 3.33e-06 ***
## age_centered 0.04104 0.03508 1.170 0.242
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age_centerd 0.616
# formal test of the constituent x age interaction
# two rows per drawing, one per constituent
data_long <- data %>%
select(drawing_file, participant_id, age_centered, hybrid_category,
individual_chance,
constituent1_in_top_5, constituent2_in_top_5) %>%
pivot_longer(
cols = c(constituent1_in_top_5, constituent2_in_top_5),
names_to = "constituent",
values_to = "in_top_5"
) %>%
mutate(constituent = factor(constituent,
levels = c("constituent1_in_top_5",
"constituent2_in_top_5"),
labels = c("first", "second")))
combined_model <- glmer(in_top_5 ~ age_centered * constituent +
offset(qlogis(individual_chance)) +
(age_centered | hybrid_category) +
(1 | participant_id) +
(1 | drawing_file),
data = data_long, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(combined_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## in_top_5 ~ age_centered * constituent + offset(qlogis(individual_chance)) +
## (age_centered | hybrid_category) + (1 | participant_id) +
## (1 | drawing_file)
## Data: data_long
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 5917.1 5974.6 -2949.5 5899.1 4373
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0245 -1.0434 0.6745 0.8874 1.4044
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## drawing_file (Intercept) 0.000000 0.00000
## participant_id (Intercept) 0.000000 0.00000
## hybrid_category (Intercept) 0.095888 0.30966
## age_centered 0.002279 0.04774 -0.04
## Number of obs: 4382, groups:
## drawing_file, 2191; participant_id, 600; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.95786 0.12517 7.652 1.97e-14 ***
## age_centered 0.11051 0.02841 3.890 0.0001 ***
## constituentsecond -0.36669 0.06189 -5.925 3.12e-09 ***
## age_centered:constituentsecond -0.07124 0.03030 -2.351 0.0187 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ag_cnt cnsttn
## age_centerd -0.010
## cnsttntscnd -0.253 -0.022
## ag_cntrd:cn -0.010 -0.554 0.017
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# joint recognition: both constituents in top 5
both_in_top5 <- glmer(both_constituents_in_top_5 ~ age_centered +
offset(qlogis(both_chance)) +
(age_centered | hybrid_category) +
(1 | participant_id),
data = data, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(both_in_top5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## both_constituents_in_top_5 ~ age_centered + offset(qlogis(both_chance)) +
## (age_centered | hybrid_category) + (1 | participant_id)
## Data: data
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 2154.4 2188.5 -1071.2 2142.4 2185
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1045 -0.6184 -0.3607 -0.0830 10.9464
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## participant_id (Intercept) 0.12531 0.3540
## hybrid_category (Intercept) 1.74568 1.3212
## age_centered 0.01173 0.1083 -0.85
## Number of obs: 2191, groups: participant_id, 600; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5253 0.5074 1.035 0.3006
## age_centered 0.1262 0.0534 2.363 0.0181 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age_centerd -0.675
# analysis (c): per-pair BLUPs from the joint model
joint_blups <- ranef(both_in_top5)$hybrid_category %>%
as_tibble(rownames = "hybrid_category") %>%
rename(intercept_dev = `(Intercept)`,
age_slope_dev = age_centered) %>%
mutate(
intercept_total = intercept_dev + fixef(both_in_top5)["(Intercept)"],
age_slope_total = age_slope_dev + fixef(both_in_top5)["age_centered"]
) %>%
arrange(desc(intercept_total))
print(joint_blups)
## # A tibble: 7 × 5
## hybrid_category intercept_dev age_slope_dev intercept_total age_slope_total
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 a tiger frog 1.34 -0.0606 1.87 0.0656
## 2 a mushroom house 1.17 -0.105 1.70 0.0208
## 3 a bike bee 0.704 -0.0744 1.23 0.0518
## 4 a sheep fish 0.496 -0.0822 1.02 0.0439
## 5 a train cat -0.0133 0.0488 0.512 0.175
## 6 an elephant snail -0.787 0.0911 -0.262 0.217
## 7 a rabbit boat -2.59 0.159 -2.06 0.285
Robustness and
exploratory analyses
# robustness: z-scored rank as continuous outcome for constituent 1
rank_model1 <- lmer(scale(concept1_rank) ~ age_centered +
(age_centered | hybrid_category) +
(1 | participant_id),
data = data)
summary(rank_model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## scale(concept1_rank) ~ age_centered + (age_centered | hybrid_category) +
## (1 | participant_id)
## Data: data
##
## REML criterion at convergence: 6013.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0454 -0.8004 -0.1998 0.8099 2.3462
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## participant_id (Intercept) 0.0564222 0.23753
## hybrid_category (Intercept) 0.0844257 0.29056
## age_centered 0.0006883 0.02624 -1.00
## Residual 0.8475031 0.92060
## Number of obs: 2191, groups: participant_id, 600; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.01795 0.11213 6.11735 0.160 0.877976
## age_centered -0.07136 0.01473 10.11918 -4.845 0.000654 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age_centerd -0.658
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# robustness: z-scored rank as continuous outcome for constituent 2
rank_model2 <- lmer(scale(concept2_rank) ~ age_centered +
(age_centered | hybrid_category) +
(1 | participant_id),
data = data)
summary(rank_model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## scale(concept2_rank) ~ age_centered + (age_centered | hybrid_category) +
## (1 | participant_id)
## Data: data
##
## REML criterion at convergence: 6077.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.86941 -0.91005 0.03783 0.81875 2.11901
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## participant_id (Intercept) 0.009920 0.09960
## hybrid_category (Intercept) 0.083219 0.28848
## age_centered 0.001599 0.03999 0.20
## Residual 0.911840 0.95490
## Number of obs: 2191, groups: participant_id, 600; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.01056 0.11104 6.01439 -0.095 0.927
## age_centered -0.02582 0.01822 6.33809 -1.417 0.204
##
## Correlation of Fixed Effects:
## (Intr)
## age_centerd 0.162
# analysis (b): continuous-rank robustness check for joint recognition
# max of the two z-scored ranks per drawing; lower = both constituents better recognized
data <- data %>%
mutate(
concept1_rank_z = as.numeric(scale(concept1_rank)),
concept2_rank_z = as.numeric(scale(concept2_rank)),
max_rank_z = pmax(concept1_rank_z, concept2_rank_z)
)
joint_rank_model <- lmer(max_rank_z ~ age_centered +
(age_centered | hybrid_category) +
(1 | participant_id),
data = data)
summary(joint_rank_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: max_rank_z ~ age_centered + (age_centered | hybrid_category) +
## (1 | participant_id)
## Data: data
##
## REML criterion at convergence: 4868.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2513 -0.7342 0.0210 0.6588 2.7644
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## participant_id (Intercept) 0.0135836 0.11655
## hybrid_category (Intercept) 0.2096745 0.45790
## age_centered 0.0007266 0.02696 0.18
## Residual 0.5157427 0.71815
## Number of obs: 2191, groups: participant_id, 600; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.68802 0.17384 6.01251 3.958 0.00744 **
## age_centered -0.05723 0.01290 6.46340 -4.437 0.00367 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age_centerd 0.145
# exploratory: joint model restricted to ages 6 and up
data_6plus <- data %>%
filter(age_numeric >= 6) %>%
mutate(age_centered = scale(age_numeric, center = TRUE, scale = FALSE)[, 1])
cat("6+ sample:", nrow(data_6plus), "drawings,",
length(unique(data_6plus$participant_id)), "sessions\n")
## 6+ sample: 1142 drawings, 288 sessions
both_6plus <- glmer(both_constituents_in_top_5 ~ age_centered +
offset(qlogis(both_chance)) +
(age_centered | hybrid_category) +
(1 | participant_id),
data = data_6plus, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(both_6plus)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## both_constituents_in_top_5 ~ age_centered + offset(qlogis(both_chance)) +
## (age_centered | hybrid_category) + (1 | participant_id)
## Data: data_6plus
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 1203.4 1233.6 -595.7 1191.4 1136
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.0374 -0.6450 -0.4547 0.9769 7.5452
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## participant_id (Intercept) 0.06994 0.2645
## hybrid_category (Intercept) 1.60821 1.2682
## age_centered 0.02092 0.1447 -1.00
## Number of obs: 1142, groups: participant_id, 288; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7157 0.4932 1.451 0.1467
## age_centered 0.1535 0.0819 1.874 0.0609 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age_centerd -0.702
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Adult benchmark
(analysis d)
# analysis (d): adult benchmark on the 215 adult drawings (~46 sessions)
# same outcomes and offsets as children; random slope dropped because
# age does not vary meaningfully within adults
data_adult <- data_raw %>%
filter(is_adult == TRUE,
concept1_available == TRUE & concept2_available == TRUE)
cat("adult sample:", nrow(data_adult), "drawings,",
length(unique(data_adult$participant_id)), "sessions\n")
## adult sample: 190 drawings, 46 sessions
# attach chance columns and per-pair / overall proportions
data_adult$individual_chance <- individual_chance
data_adult$both_chance <- both_chance
adult_overall <- data_adult %>%
summarise(
n = n(),
prop_c1_top5 = mean(constituent1_in_top_5),
prop_c2_top5 = mean(constituent2_in_top_5),
prop_both_top5 = mean(both_constituents_in_top_5)
)
print(adult_overall)
## n prop_c1_top5 prop_c2_top5 prop_both_top5
## 1 190 0.5947368 0.5789474 0.3157895
adult_by_category <- data_adult %>%
group_by(hybrid_category) %>%
summarise(
n = n(),
prop_c1_top5 = round(mean(constituent1_in_top_5), 3),
prop_c2_top5 = round(mean(constituent2_in_top_5), 3),
prop_both_top5 = round(mean(both_constituents_in_top_5), 3),
.groups = "drop"
) %>%
arrange(desc(prop_both_top5))
print(adult_by_category)
## # A tibble: 7 × 5
## hybrid_category n prop_c1_top5 prop_c2_top5 prop_both_top5
## <chr> <int> <dbl> <dbl> <dbl>
## 1 a tiger frog 25 0.72 0.68 0.52
## 2 a bike bee 26 0.423 0.808 0.423
## 3 a sheep fish 25 0.8 0.56 0.36
## 4 a train cat 26 0.654 0.654 0.346
## 5 a mushroom house 33 0.515 0.364 0.333
## 6 an elephant snail 27 0.37 0.741 0.185
## 7 a rabbit boat 28 0.714 0.321 0.071
# constituent 1
adult_model1 <- glmer(constituent1_in_top_5 ~ 1 +
offset(qlogis(individual_chance)) +
(1 | hybrid_category) +
(1 | participant_id),
data = data_adult, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(adult_model1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: constituent1_in_top_5 ~ 1 + offset(qlogis(individual_chance)) +
## (1 | hybrid_category) + (1 | participant_id)
## Data: data_adult
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 258.2 268.0 -126.1 252.2 187
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6043 -0.9807 0.6233 0.7588 1.0944
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.000 0.0000
## hybrid_category (Intercept) 0.263 0.5129
## Number of obs: 190, groups: participant_id, 46; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0083 0.2477 4.071 4.68e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# constituent 2
adult_model2 <- glmer(constituent2_in_top_5 ~ 1 +
offset(qlogis(individual_chance)) +
(1 | hybrid_category) +
(1 | participant_id),
data = data_adult, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(adult_model2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: constituent2_in_top_5 ~ 1 + offset(qlogis(individual_chance)) +
## (1 | hybrid_category) + (1 | participant_id)
## Data: data_adult
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 255.3 265.0 -124.6 249.3 187
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6212 -0.8449 0.5801 0.7332 1.3006
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.1014 0.3184
## hybrid_category (Intercept) 0.4265 0.6531
## Number of obs: 190, groups: participant_id, 46; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.9795 0.2985 3.282 0.00103 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# joint
adult_both <- glmer(both_constituents_in_top_5 ~ 1 +
offset(qlogis(both_chance)) +
(1 | hybrid_category) +
(1 | participant_id),
data = data_adult, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(adult_both)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: both_constituents_in_top_5 ~ 1 + offset(qlogis(both_chance)) +
## (1 | hybrid_category) + (1 | participant_id)
## Data: data_adult
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 239.0 248.8 -116.5 233.0 187
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.9202 -0.6968 -0.5391 1.1491 2.1728
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.08406 0.2899
## hybrid_category (Intercept) 0.31628 0.5624
## Number of obs: 190, groups: participant_id, 46; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2539 0.2815 4.455 8.39e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# constituent asymmetry in adults: long format with constituent factor
data_adult_long <- data_adult %>%
select(drawing_file, participant_id, hybrid_category, individual_chance,
constituent1_in_top_5, constituent2_in_top_5) %>%
pivot_longer(
cols = c(constituent1_in_top_5, constituent2_in_top_5),
names_to = "constituent",
values_to = "in_top_5"
) %>%
mutate(constituent = factor(constituent,
levels = c("constituent1_in_top_5",
"constituent2_in_top_5"),
labels = c("first", "second")))
adult_combined <- glmer(in_top_5 ~ constituent +
offset(qlogis(individual_chance)) +
(1 | hybrid_category) +
(1 | participant_id) +
(1 | drawing_file),
data = data_adult_long, family = binomial,
control = glmerControl(optimizer = "bobyqa"))
summary(adult_combined)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: in_top_5 ~ constituent + offset(qlogis(individual_chance)) +
## (1 | hybrid_category) + (1 | participant_id) + (1 | drawing_file)
## Data: data_adult_long
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 523.4 543.1 -256.7 513.4 375
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3529 -1.1400 0.7530 0.8257 0.9899
##
## Random effects:
## Groups Name Variance Std.Dev.
## drawing_file (Intercept) 1.110e-14 1.054e-07
## participant_id (Intercept) 1.321e-14 1.149e-07
## hybrid_category (Intercept) 6.536e-02 2.557e-01
## Number of obs: 380, groups:
## drawing_file, 190; participant_id, 46; hybrid_category, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.99311 0.17835 5.568 2.57e-08 ***
## constituentsecond -0.06619 0.21006 -0.315 0.753
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cnsttntscnd -0.593
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Figures
clean_category <- function(x) {
x %>%
str_remove("^an? ") %>%
str_replace_all(" ", "-")
}
# per-pair, per-age proportions for both constituents
panel_a_data <- data %>%
mutate(hybrid_category = clean_category(hybrid_category)) %>%
select(participant_id, age_numeric, hybrid_category,
constituent1_in_top_5, constituent2_in_top_5) %>%
pivot_longer(
cols = c(constituent1_in_top_5, constituent2_in_top_5),
names_to = "constituent",
values_to = "top_5"
) %>%
mutate(constituent = factor(constituent,
levels = c("constituent1_in_top_5",
"constituent2_in_top_5"),
labels = c("Constituent 1", "Constituent 2"))) %>%
group_by(age_numeric, hybrid_category, constituent) %>%
summarise(
prop_top_5 = mean(top_5),
se = sd(top_5) / sqrt(n()),
n_drawings = n(),
.groups = "drop"
)
fig_constituents <- ggplot(panel_a_data,
aes(x = age_numeric, y = prop_top_5, color = constituent)) +
geom_hline(yintercept = individual_chance, linetype = "dashed", color = "grey50") +
geom_point(aes(size = n_drawings), alpha = 0.8) +
geom_linerange(aes(ymin = prop_top_5 - se, ymax = prop_top_5 + se), alpha = 0.3) +
geom_smooth(method = "lm", aes(fill = constituent), alpha = 0.2, se = TRUE) +
facet_wrap(~hybrid_category, nrow = 1) +
scale_color_manual(values = c("Constituent 1" = "#E74C3C",
"Constituent 2" = "#3498DB")) +
scale_fill_manual(values = c("Constituent 1" = "#E74C3C",
"Constituent 2" = "#3498DB")) +
scale_size_continuous(range = c(0.5, 2)) +
ylim(0, 1) +
labs(x = "Age (years)", y = "Proportion with constituent in top 5",
color = "Constituent", fill = "Constituent",
size = "Number of\ndrawings") +
theme_few() +
theme(legend.position = "right", aspect.ratio = 1,
strip.text = element_text(size = 8),
axis.title = element_text(size = 9),
axis.text = element_text(size = 9),
legend.title = element_text(size = 8),
legend.text = element_text(size = 7))
fig_constituents

# ggsave("../writing/fig_constituent_age.svg", fig_constituents, width = 10, height = 4)
# per-category and overall age trajectories for joint recognition
category_data <- data %>%
mutate(hybrid_category = clean_category(hybrid_category)) %>%
group_by(age_numeric, hybrid_category) %>%
summarise(prop_both_top_5 = mean(both_constituents_in_top_5), .groups = "drop")
collapsed_data <- data %>%
group_by(age_numeric) %>%
summarise(
prop_both_top_5 = mean(both_constituents_in_top_5),
se = sd(both_constituents_in_top_5) / sqrt(n()),
n_drawings = n(),
.groups = "drop"
)
x_limits <- range(collapsed_data$age_numeric)
fig_joint <- ggplot() +
geom_hline(yintercept = both_chance, linetype = "dashed", color = "grey50") +
geom_smooth(data = category_data,
aes(x = age_numeric, y = prop_both_top_5,
color = hybrid_category, fill = hybrid_category),
method = "lm", se = TRUE, alpha = 0.1, linewidth = 0.7) +
geom_smooth(data = collapsed_data,
aes(x = age_numeric, y = prop_both_top_5),
method = "lm", color = "black", fill = "grey30",
alpha = 0.3, linewidth = 1.2, se = TRUE) +
geom_point(data = collapsed_data,
aes(x = age_numeric, y = prop_both_top_5, size = n_drawings),
alpha = 0.8) +
scale_size_continuous(range = c(2, 5)) +
scale_color_viridis_d(name = "Hybrid category") +
scale_fill_viridis_d(name = "Hybrid category") +
scale_x_continuous(limits = x_limits, expand = c(0.01, 0.01)) +
ylim(0, 1) +
labs(x = "Age (years)", y = "Proportion with both constituents in top 5",
size = "Number of\ndrawings") +
theme_few() +
theme(legend.position = "right",
axis.title = element_text(size = 12),
axis.text = element_text(size = 11)) +
guides(fill = guide_legend(), color = guide_legend())
fig_joint

# ggsave("../writing/fig_joint_age.svg", fig_joint, width = 7, height = 5)
fig_ranks <- data %>%
mutate(hybrid_category_clean = clean_category(hybrid_category)) %>%
ggplot(aes(x = concept1_rank, y = concept2_rank, color = hybrid_category_clean)) +
geom_point(alpha = 0.6, size = 2) +
geom_hline(yintercept = 5, color = "black", linewidth = 0.5) +
geom_vline(xintercept = 5, color = "black", linewidth = 0.5) +
facet_wrap(~hybrid_category_clean, nrow = 1) +
coord_fixed(ratio = 1) +
scale_color_viridis_d() +
labs(x = "Constituent 1 classification rank",
y = "Constituent 2 classification rank") +
theme_few() +
theme(legend.position = "none",
strip.text = element_text(size = 8),
axis.title = element_text(size = 9),
axis.text = element_text(size = 9))
fig_ranks

# ggsave("../writing/fig_rank_scatter.svg", fig_ranks, width = 10, height = 3)