Preprocessing

Load data

Import recognition data from each run of recoggames: here, animalgame & vehiclegame

#
animal_game <- read.csv("recognition_data/animalgame.csv") %>%
  as.tibble() %>%
  mutate(exp = 'animalgame') %>%
  select(-X)
## Warning: `as.tibble()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.
#
vehicle_game <- read.csv("recognition_data/vehiclegame.csv") %>%
  as.tibble() %>%
  mutate(exp = 'vehiclegame') %>%
  select(-X)
#
biganimal_game <- read.csv("recognition_data/biganimalgame.csv") %>%
  as.tibble() %>%
  mutate(exp = 'biganimalgame') %>%
  select(-X)

object_game <- read.csv("recognition_data/objectgame.csv") %>%
  as.tibble() %>%
  mutate(exp = 'objectgame') %>%
  select(-X)

recog_data <- animal_game %>%
  full_join(vehicle_game)%>%
  full_join(biganimal_game) %>%
  full_join(object_game)
## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")
## Warning: Column `sessionId` joining factors with different levels, coercing
## to character vector
## Warning: Column `sketch_path` joining factors with different levels,
## coercing to character vector
## Warning: Column `intended_category` joining factors with different levels,
## coercing to character vector
## Warning: Column `clicked_category` joining factors with different levels,
## coercing to character vector
## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")
## Warning: Column `sessionId` joining character vector and factor, coercing
## into character vector
## Warning: Column `sketch_path` joining character vector and factor, coercing
## into character vector
## Warning: Column `intended_category` joining character vector and factor,
## coercing into character vector
## Warning: Column `clicked_category` joining character vector and factor,
## coercing into character vector
## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")
## Warning: Column `sessionId` joining character vector and factor, coercing
## into character vector
## Warning: Column `sketch_path` joining character vector and factor, coercing
## into character vector
## Warning: Column `intended_category` joining character vector and factor,
## coercing into character vector
## Warning: Column `clicked_category` joining character vector and factor,
## coercing into character vector
## make copy for editing
orig_d  <- recog_data
d <- recog_data

Make variables correct types in dataset

# make similar levels
d$clicked_category = as.factor(d$clicked_category)
d$intended_category = factor(d$intended_category, levels=levels(d$clicked_category))

# compute accurcy
d <- d %>%
  mutate(correct_or_not = (clicked_category == intended_category))  %>%
  mutate(recognizer_age_numeric = str_split_fixed(recognizer_age, 'age',2)[,2])

d$recognizer_age <- factor(d$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))

Data filtering

Filter non-compliant subjects & trials:

##Filter out adults, those that didn't get past more than 1 real trial, and trials with RTs that are way too long or short

adults <- d %>%
  filter(recognizer_age == 'adult')

didnt_start <- d %>%
  group_by(sessionId) %>%
  mutate(count_trials = max(trial_num)) %>%
  filter(count_trials < 5)

# do actual filtering here
d <- d %>%
  filter(!sessionId %in% didnt_start$sessionId) %>%
  filter(!sessionId %in% adults$sessionId) %>%
  filter(RT>100 & RT<10000) # super long or super short trial

Calculate performance on photo catch trials; visualize for each subject; compile list of off-task subjects

# threshold : 75% correct
threshold=.75

# compute avg correct photo trials for each subject
photo_correct <- d %>%
  group_by(sessionId,recognizer_age) %>%
  filter(producer_age == "photo") %>%
  summarize(avg_photo_correct = mean(correct_or_not)) 

# visualize these data by each age group
ggplot(photo_correct, aes(x=recognizer_age, y=avg_photo_correct, col=recognizer_age)) +
  geom_jitter(alpha=.6) +
  scale_color_viridis(discrete=TRUE) +
  geom_hline(yintercept=threshold)

# make a list of the subjects who don't meet our threshold
bad_subs <- photo_correct %>%
  filter(avg_photo_correct < threshold) ## includes subjects who got 75% correct, excludes all those below

Filter out subs who don’t meet photo correct threshold

# filter bad subs
d <- d %>%
  filter(!sessionId %in% bad_subs$sessionId)

# check that we did this right
photo_trials_by_sub <- d %>%
  filter(producer_age == 'photo') %>%
  group_by(sessionId) %>%
  summarize(avg_correct = mean(correct_or_not))

# make sure this is true.
assert_that(sum(photo_trials_by_sub$avg_correct<threshold)==0)
## [1] TRUE

Finally, filter kids that didn’t have valid trials on both photo/sketch trials

cor_by_trial_type <- d %>%
  mutate(photo_or_not = (producer_age == 'photo')) %>%
  group_by(photo_or_not,sessionId) %>%
  summarize(count_cor = sum(correct_or_not), count_items = n(), avg_correct = count_cor / count_items) 

only_one_type <- cor_by_trial_type %>%
  group_by(sessionId) %>%
  summarize(count_ids = n()) %>%
  filter(count_ids == 1)

# filter these subjects
d <- d %>%
  filter(!sessionId %in% only_one_type$sessionId)

Calculate number of trials per kid (not adult) after these exclusions and report exclusions

num_trials_per_kid <- d %>%
  # filter(!sessionId %in% adults$sessionId) %>% # exclude adults (prereg code, error)
  filter(recognizer_age != "adult") %>% # exclude adults
  group_by(sessionId) %>%
  summarize(max_trials = max(trial_num)) %>%
  summarize(average_trials = mean(max_trials))

num_kids_per_exp <- d %>%
  filter(recognizer_age != "adult") %>% # exclude adults
  group_by(exp,recognizer_age) %>%
  summarize(num_subs = length(unique(sessionId)))

##

First, we excluded children who started the game but did not complete more than 1 trial after the practice trials (N = 1068 participants) and the 238 adults who participated. We also excluded all trials with RTs slower than 10s or faster than 100ms, judging these to be off-task responses. Next, we excluded participants on the basis of their performance on practice and catch trials; given that these catch trials presented a very easy recognition task, we excluded participants who did not acheive at least 75% accuracy on these trials (N= 1064). The remaining 1975 who met this criterion completed an average of 20.98 trials. On total, we analyzed 38913 trials where children recognized each others drawings.

Exclusion rates in each age bin; see that we are mostly filtering out young kids not on task.

bad_subs_descriptives <- orig_d %>%
  filter(sessionId %in% bad_subs$sessionId) %>%
  group_by(sessionId) %>%
  summarize(count_trials = n(), recognizer_age = recognizer_age[1]) %>%
  group_by(recognizer_age) %>%
  summarize(count_subs = n(), avg_trials = mean(count_trials))

kable(bad_subs_descriptives)
recognizer_age count_subs avg_trials
age10 46 19.19565
age2 269 10.82528
age3 270 14.37778
age4 180 16.18889
age5 103 21.38835
age6 60 20.16667
age7 50 17.58000
age8 49 17.34694
age9 37 15.45946

Calculate number of subs left in each age

d %>%
  group_by(recognizer_age) %>%
  summarize(num_subs = length(unique(sessionId))) %>%
  kable()
recognizer_age num_subs
age2 186
age3 329
age4 408
age5 301
age6 227
age7 188
age8 131
age9 84
age10 121

First set of descriptives

How are recognizers doing on photo trials aross age? Looks pretty flat.

by_recognizer_photo <- d %>% 
  group_by(recognizer_age) %>%
  filter(producer_age == 'photo') %>%
  group_by(sessionId,recognizer_age) %>% 
  summarize(indiv_photo_correct = mean(correct_or_not)) %>% # average first over individual participants
  group_by(recognizer_age) %>%
  multi_boot_standard(col = 'indiv_photo_correct') 

by_recognizer_photo$recognizer_age <- factor(by_recognizer_photo$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))

ggplot(by_recognizer_photo,aes(x=recognizer_age, y=mean, col = recognizer_age)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  scale_color_viridis(discrete = "TRUE") + 
  ylab('Proportion recognized - PHOTO') +
  ylim(.75,1) + # can't be lower than 75% correct or higher than 1
  theme_few()

Look at overall recognition by recognizer age; looks like we should exclude 2 year-olds.

by_recognizer <- d %>%
  group_by(recognizer_age) %>%
  filter(producer_age != 'photo') %>%
  multi_boot_standard(col = 'correct_or_not')

# make levels in reasonable order
by_recognizer$recognizer_age <- factor(by_recognizer$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))

ggplot(by_recognizer,aes(x=recognizer_age, y=mean, col = recognizer_age)) +
  theme_few() +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  scale_color_viridis(discrete = "TRUE") + 
  ylab('Proportion drawings recognized') +
  ylim(.25,.8) +
  geom_smooth() 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Look at use of “don’t know” buton across age; roughly goes down with recognizer age.

by_recognizer_dont_know <- d %>%
  mutate(dont_know = (clicked_category == 'dont_know')) %>%
  group_by(recognizer_age) %>%
  multi_boot_standard(col = 'dont_know') 

ggplot(by_recognizer_dont_know,aes(x=recognizer_age, y=mean, col = recognizer_age)) +
  theme_few() +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  scale_color_viridis(discrete = "TRUE") + 
  ylab('Proportion trials selected "dont know"') +
  geom_smooth() 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Updated filtering

For later, fine-grain analyses: given uneven distribution of subjects and performance from 2-year-olds, create new age bins that (1) exclude 2-year-olds and (2) group together older kids.

## filter recognition data to just kids and wrangle variables
kids_sketches_d <- d %>%
  filter(producer_age != 'photo') %>% # don't look at photo trials
  filter(recognizer_age != 'age2') %>%  # or 2-yr-olds
  mutate(recognizer_age_numeric = as.numeric(str_split_fixed(recognizer_age,'age',2)[,2])) %>% 
  mutate(recognizer_age_group = cut(recognizer_age_numeric, c(2.9, 4, 6, 8, 10.1), labels = c("3-4 yrs","5-6 yrs","7-8 yrs","9-10 yrs"))) %>%
  mutate(recognizer_age_group_numeric = cut(recognizer_age_numeric, c(2.9, 4, 6, 8, 10.1), labels=c(3,5,7,9))) %>%
  mutate(recognizer_age_group_numeric = as.numeric(recognizer_age_group_numeric))

And wrangle sketch paths so can be joined with classification data outputs

kids_sketches_d <- kids_sketches_d %>%
  mutate(sketch_path = as.factor(str_split_fixed(sketch_path,'/',2)[,2])) %>% 
  mutate(sketch_path = as.factor(str_split_fixed(sketch_path,'.png',2)[,1])) 

Now also look at number of included subjects in each age group and experiment

kids_sketches_d %>%
  group_by(recognizer_age_group,exp) %>%
  summarize(num_subs = length(unique(sessionId))) %>%
  kable()
recognizer_age_group exp num_subs
3-4 yrs animalgame 111
3-4 yrs biganimalgame 211
3-4 yrs objectgame 291
3-4 yrs vehiclegame 124
5-6 yrs animalgame 63
5-6 yrs biganimalgame 174
5-6 yrs objectgame 190
5-6 yrs vehiclegame 101
7-8 yrs animalgame 33
7-8 yrs biganimalgame 81
7-8 yrs objectgame 137
7-8 yrs vehiclegame 68
9-10 yrs animalgame 37
9-10 yrs biganimalgame 57
9-10 yrs objectgame 77
9-10 yrs vehiclegame 34

Basic descriptives and plots

Examine recognition data

Examine recognition by recognizer’s age and experiment run

by_recognizer_filtered <- kids_sketches_d %>%
  group_by(recognizer_age, recognizer_age_numeric,exp) %>%
  multi_boot_standard(col = 'correct_or_not')

count_trials <- kids_sketches_d %>%
  group_by(recognizer_age) %>%
  summarize(count_trials = n())

by_recognizer_filtered$recognizer_age <- factor(by_recognizer_filtered$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))

## Scale dots by number of trials in each bin to get a sense of variability
by_recognizer_filtered <-  by_recognizer_filtered %>%
  left_join(count_trials) %>%
  mutate(scale = count_trials / 1000) %>%
  group_by(exp)
## Joining, by = "recognizer_age"
ggplot(by_recognizer_filtered,aes(x=recognizer_age_numeric, y=mean, col = recognizer_age_numeric, size=scale)) +
  theme_few(base_size=18) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  geom_smooth(alpha=.2, color='grey') +
  scale_color_viridis(discrete = "FALSE") + 
  ylab('Proportion drawings recognized') +
  scale_size_area(max_size=1.5) +
  ylim(.25,.8) +
  xlab('Recognizer Age') +
  geom_hline(yintercept = .25, linetype="dashed") + 
  theme(axis.ticks.x = element_blank(), legend.position='none', aspect.ratio = 1) +
  facet_grid(~exp)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_pointrange).

Also visualize by recognizer age GROUP x experiment run

## Doesn't seem quite right because there are such vastly different numbers of trials per kid.
# by_recognizer_filtered_age_group <- kids_sketches_d %>%
#   group_by(sessionId,recognizer_age_group,recognizer_age_group_numeric,exp) %>% 
#   summarize(indiv_photo_correct = mean(correct_or_not)) %>% # average first over individual participants
#   group_by(recognizer_age_group, recognizer_age_group_numeric,exp) %>%
#   multi_boot_standard(col = 'indiv_photo_correct')
# 
# count_subs_age_group <- kids_sketches_d %>%
#   distinct(sessionId,recognizer_age_group) %>%
#   group_by(recognizer_age_group) %>%
#   summarize(count_subs = n())

by_recognizer_filtered_age_group <- kids_sketches_d %>%
  group_by(recognizer_age_group,recognizer_age_group_numeric,exp) %>% 
  multi_boot_standard(col = 'correct_or_not')

count_trials_age_group <- kids_sketches_d %>%
  group_by(recognizer_age_group) %>%
  summarize(count_trials = n())

## Scale dots by number of trials in each bin to get a sense of variability
by_recognizer_filtered_age_group <-  by_recognizer_filtered_age_group %>%
  left_join(count_trials_age_group) %>%
  mutate(scale = count_trials) %>%
  group_by(exp)
## Joining, by = "recognizer_age_group"
ggplot(by_recognizer_filtered_age_group,aes(x=recognizer_age_group_numeric, y=mean, col = recognizer_age_group_numeric, size=scale)) +
  theme_few(base_size=18) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  # geom_smooth(alpha=.2, color='grey') +
  scale_color_viridis(discrete = "FALSE") + 
  ylab('Proportion drawings recognized') +
  scale_size_area(max_size=1.5) +
  ylim(.25,.8) +
  xlab('Recognizer Age Group') +
  geom_hline(yintercept = .25, linetype="dashed") + 
  theme(axis.ticks.x = element_blank(), legend.position='none', aspect.ratio = 1) +
  facet_grid(~exp)

How does recognition vary with the age of the PRODUCER of the drawing? Goes up, as we would expect

# summarize avg correct by producer age  
by_producer <- d %>%
  filter(recognizer_age!= 'adult') %>%
  group_by(producer_age) %>%
  multi_boot_standard(col = 'correct_or_not')

ggplot(by_producer,aes(x=producer_age, y=mean, col = producer_age)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  scale_color_viridis(discrete = "TRUE") + 
  ylab('Proportion recognized') +
  theme_few()

How does recognition break down by each category?

both_category <- d %>%
  group_by(producer_age,intended_category) %>%
  multi_boot_standard(col = 'correct_or_not') 

ggplot(both_category,aes(x=producer_age, y=mean, col=producer_age)) +
  theme_few() + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  ylab('average correct') +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), aspect.ratio = 1) +
  facet_wrap(~intended_category) +
  scale_color_viridis(discrete=TRUE) 

Compute and plot recognition by sketch distinctiveness

We expect that the distintiveness of each drawing will be a major factor in how well it is recognized, and, further, that older children will be more sensitive to the presence of these distinctive features.

Import classifications data (4-way classifications separetly for each run of recoggames)

c_vehiclegame <- read.csv("compiled_classifications/classification-outputs-vehiclegame_C_0.1_460.csv") %>%
  as.tibble() %>%
  select(-X.1, -X) %>%
  mutate(denom = (airplane_prob + train_prob + boat_prob + car_prob) - target_label_prob) %>%
  mutate(log_odds = log(target_label_prob / denom)) %>% ## compute log odd probability
  mutate(exp='vehiclegame')

# now read it and join with other classifications  
c_animalgame <- read.csv("compiled_classifications/classification-outputs-animalgame_C_0.1_560.csv") %>%
  as.tibble() %>%
  select(-X.1, -X) %>%
  mutate(denom = (dog_prob + fish_prob + rabbit_prob + bird_prob) - target_label_prob) %>%
  mutate(log_odds = log(target_label_prob / denom)) %>%
  mutate(exp='animalgame') 

#
c_biganimalgame <- read.csv("compiled_classifications/classification-outputs-biganimalgame_C_0.1_600.csv") %>%
  as.tibble() %>%
  select(-X.1, -X) %>%
  mutate(denom = (bear_prob + sheep_prob + camel_prob + tiger_prob) - target_label_prob) %>%
  mutate(log_odds = log(target_label_prob / denom)) %>%
  mutate(exp='animalgame')


# now read it and join with other classifications  
c <- read.csv("compiled_classifications/classification-outputs-objectgame_C_0.1_450.csv") %>%
  as.tibble() %>%
  select(-X.1, -X) %>%
  mutate(denom = (bottle_prob + lamp_prob + hat_prob + cup_prob) - target_label_prob) %>%
  mutate(log_odds = log(target_label_prob / denom)) %>%
  mutate(exp='objectgame') %>% 
  full_join(c_vehiclegame) %>% 
  full_join(c_animalgame) %>%
  full_join(c_biganimalgame) %>%
  mutate(sketch_path = paste0(target_label,'_','sketch_age',age,'_cdm_',session_id)) %>%
  mutate(sketch_path = as.factor(sketch_path)) 
## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")
## Warning: Column `target_label` joining factors with different levels,
## coercing to character vector
## Warning: Column `session_id` joining factors with different levels,
## coercing to character vector
## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")
## Warning: Column `target_label` joining character vector and factor,
## coercing into character vector
## Warning: Column `session_id` joining character vector and factor, coercing
## into character vector
## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")
## Warning: Column `target_label` joining character vector and factor,
## coercing into character vector

## Warning: Column `session_id` joining character vector and factor, coercing
## into character vector

Check how log-odds varies with target label probability and classifier outcome

ggplot(c, aes(x=target_label_prob, y=log_odds, col=correct_or_not)) +
  geom_jitter(alpha=.1) +
  facet_wrap(~target_label) +
  theme_few()

Compute distincitveness bins, join classifications & recognition data

num_bins=10

## compute bins for distinctiveness
sketch_by_distinctiveness <- c %>%
  mutate(distinct_index = ntile(log_odds,num_bins)) %>% ## compute bins based on log odds over entire dataset
  select(sketch_path, distinct_index, log_odds)

## join distinctiveness bins for each sketch in dataset
all_joined <- kids_sketches_d %>%
  left_join(sketch_by_distinctiveness) 
## Joining, by = "sketch_path"
## Warning: Column `sketch_path` joining factors with different levels,
## coercing to character vector

How does distinctiveness bin vary with classifier probability /correctness?

ggplot(all_joined, aes(x=distinct_index, y=log_odds)) +
  geom_jitter(alpha=.03) +
  facet_wrap(~intended_category) +
  theme_few()
## Warning: Removed 1587 rows containing missing values (geom_point).

Plot distinctivenss by item effects

distinct_by_item <- all_joined %>%
  group_by(intended_category, distinct_index) %>%
  multi_boot_standard(col='correct_or_not') %>%
  group_by(distinct_index)

ggplot(distinct_by_item, aes(x=distinct_index, y=mean, col=intended_category)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  geom_smooth(alpha=.2, span=10, method='lm') +
  theme_few() +
  scale_x_continuous(breaks=seq(1,10,2)) +
  xlab('Distinctiveness Index') +
  ylab('Proportion recognized') +
  theme(legend.position='none') + 
  facet_wrap(~intended_category) 
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing missing values (geom_pointrange).

Plot distinctivenss by age interaction

distinct_by_age <- all_joined %>%
  group_by(recognizer_age_numeric, distinct_index) %>%
  multi_boot_standard(col='correct_or_not') %>%
  group_by(distinct_index)

ggplot(distinct_by_age, aes(x=distinct_index, y=mean, col=recognizer_age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  geom_smooth(alpha=.2, span=10, method='lm') +
  facet_grid(~recognizer_age_numeric) +
  theme_few(base_size=18) + 
  theme(legend.position="none") +
  geom_hline(yintercept=.25, linetype='dashed',color='grey') + 
  xlab('Drawing distinctiveness') +
  ylab('Proportion drawings recognized') + 
  scale_color_viridis(discrete=FALSE) +
  scale_x_continuous(
    breaks=c(2,9),
    # labels=c('Least \n distinctive','Most \n distinctive')
    labels=c(' Least ',' Most ')
        ) +
  theme(axis.ticks.x=element_blank())
## Warning: Removed 8 rows containing non-finite values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_pointrange).

Plot distinctivenss by age group interaction

distinct_by_age <- all_joined %>%
  group_by(recognizer_age_group, distinct_index) %>%
  multi_boot_standard(col='correct_or_not') %>%
  group_by(distinct_index)

(distinct_by_age_plot <- ggplot(distinct_by_age, aes(x=distinct_index, y=mean, col=recognizer_age_group)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  geom_smooth(alpha=.2, span=4, method='lm') +
  facet_grid(~recognizer_age_group) +
  theme_few(base_size = 22) + 
  scale_x_continuous(breaks=seq(1,10,2)) +
  xlab('Distinctiveness Index') +
  ylab('Proportion recognized') +
  theme(legend.position='none') + 
  scale_color_viridis(discrete=TRUE)) 
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_pointrange).

# ggsave('DistinctByAge.svg',distinct_by_age_plot, width=11)

Inferential statistics

Examine how distinctivenss vs age group interact in glmer

model_glmer <- glmer(correct_or_not ~ scale(distinct_index)*scale(recognizer_age_group_numeric) + (distinct_index|intended_category) + (1|sessionId),  data = all_joined, family='binomial')
summary(model_glmer)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## correct_or_not ~ scale(distinct_index) * scale(recognizer_age_group_numeric) +  
##     (distinct_index | intended_category) + (1 | sessionId)
##    Data: all_joined
## 
##      AIC      BIC   logLik deviance df.resid 
##  32512.5  32577.8 -16248.2  32496.5    26033 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9426 -0.8200  0.2934  0.8222  3.0740 
## 
## Random effects:
##  Groups            Name           Variance Std.Dev. Corr 
##  sessionId         (Intercept)    0.250400 0.5004        
##  intended_category (Intercept)    0.237255 0.4871        
##                    distinct_index 0.002725 0.0522   -0.36
## Number of obs: 26041, groups:  sessionId, 1785; intended_category, 16
## 
## Fixed effects:
##                                                           Estimate
## (Intercept)                                                0.03209
## scale(distinct_index)                                      0.47327
## scale(recognizer_age_group_numeric)                        0.31436
## scale(distinct_index):scale(recognizer_age_group_numeric)  0.06317
##                                                           Std. Error
## (Intercept)                                                  0.11910
## scale(distinct_index)                                        0.04067
## scale(recognizer_age_group_numeric)                          0.01944
## scale(distinct_index):scale(recognizer_age_group_numeric)    0.01435
##                                                           z value Pr(>|z|)
## (Intercept)                                                 0.269    0.788
## scale(distinct_index)                                      11.637  < 2e-16
## scale(recognizer_age_group_numeric)                        16.169  < 2e-16
## scale(distinct_index):scale(recognizer_age_group_numeric)   4.402 1.07e-05
##                                                              
## (Intercept)                                                  
## scale(distinct_index)                                     ***
## scale(recognizer_age_group_numeric)                       ***
## scale(distinct_index):scale(recognizer_age_group_numeric) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scl(_) s(___)
## scl(dstnc_) 0.228               
## scl(rcg___) 0.011  0.017        
## sc(_):(___) 0.007  0.014  0.034

Inferential model #2: Recognizer age as a continous variable

model_glmer_age_cont <- glmer(correct_or_not ~ scale(distinct_index)*scale(recognizer_age_numeric) + (distinct_index|intended_category) + (1|sessionId),  data = all_joined, family='binomial')
summary(model_glmer_age_cont)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## correct_or_not ~ scale(distinct_index) * scale(recognizer_age_numeric) +  
##     (distinct_index | intended_category) + (1 | sessionId)
##    Data: all_joined
## 
##      AIC      BIC   logLik deviance df.resid 
##  32487.0  32552.3 -16235.5  32471.0    26033 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8427 -0.8191  0.2923  0.8256  3.0615 
## 
## Random effects:
##  Groups            Name           Variance Std.Dev. Corr 
##  sessionId         (Intercept)    0.242376 0.49232       
##  intended_category (Intercept)    0.235771 0.48556       
##                    distinct_index 0.002744 0.05238  -0.35
## Number of obs: 26041, groups:  sessionId, 1785; intended_category, 16
## 
## Fixed effects:
##                                                     Estimate Std. Error
## (Intercept)                                          0.03258    0.11936
## scale(distinct_index)                                0.47425    0.04079
## scale(recognizer_age_numeric)                        0.32806    0.01934
## scale(distinct_index):scale(recognizer_age_numeric)  0.06884    0.01446
##                                                     z value Pr(>|z|)    
## (Intercept)                                           0.273    0.785    
## scale(distinct_index)                                11.627  < 2e-16 ***
## scale(recognizer_age_numeric)                        16.962  < 2e-16 ***
## scale(distinct_index):scale(recognizer_age_numeric)   4.762 1.92e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scl(_) sc(__)
## scl(dstnc_) 0.238               
## scl(rcgn__) 0.010  0.019        
## scl(_):(__) 0.008  0.014  0.040