Preprocessing
Basic descriptives and plots
Inferential statistics
- Examine how distinctivenss vs age group interact in glmer
- Inferential model #2: Recognizer age as a continous variable

Preprocessing

Load data

Import recognition data from each run of recoggames: here, animalgame & vehiclegame

#
animal_game <- read.csv("recognition_data/animalgame.csv") %>%
  as.tibble() %>%
  mutate(exp = 'animalgame') %>%
  select(-X)

## Warning: `as.tibble()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.

#
vehicle_game <- read.csv("recognition_data/vehiclegame.csv") %>%
  as.tibble() %>%
  mutate(exp = 'vehiclegame') %>%
  select(-X)
#
biganimal_game <- read.csv("recognition_data/biganimalgame.csv") %>%
  as.tibble() %>%
  mutate(exp = 'biganimalgame') %>%
  select(-X)

object_game <- read.csv("recognition_data/objectgame.csv") %>%
  as.tibble() %>%
  mutate(exp = 'objectgame') %>%
  select(-X)

recog_data <- animal_game %>%
  full_join(vehicle_game)%>%
  full_join(biganimal_game) %>%
  full_join(object_game)

## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")

## Warning: Column `sessionId` joining factors with different levels, coercing
## to character vector

## Warning: Column `sketch_path` joining factors with different levels,
## coercing to character vector

## Warning: Column `intended_category` joining factors with different levels,
## coercing to character vector

## Warning: Column `clicked_category` joining factors with different levels,
## coercing to character vector

## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")

## Warning: Column `sessionId` joining character vector and factor, coercing
## into character vector

## Warning: Column `sketch_path` joining character vector and factor, coercing
## into character vector

## Warning: Column `intended_category` joining character vector and factor,
## coercing into character vector

## Warning: Column `clicked_category` joining character vector and factor,
## coercing into character vector

## Joining, by = c("sessionId", "trial_num", "recognizer_age", "sketch_path", "intended_category", "producer_age", "clicked_category", "RT", "exp")

## Warning: Column `sessionId` joining character vector and factor, coercing
## into character vector

## Warning: Column `sketch_path` joining character vector and factor, coercing
## into character vector

## Warning: Column `intended_category` joining character vector and factor,
## coercing into character vector

## Warning: Column `clicked_category` joining character vector and factor,
## coercing into character vector

## make copy for editing
orig_d  <- recog_data
d <- recog_data

Make variables correct types in dataset

# make similar levels
d$clicked_category = as.factor(d$clicked_category)
d$intended_category = factor(d$intended_category, levels=levels(d$clicked_category))

# compute accurcy
d <- d %>%
  mutate(correct_or_not = (clicked_category == intended_category))  %>%
  mutate(recognizer_age_numeric = str_split_fixed(recognizer_age, 'age',2)[,2])

d$recognizer_age <- factor(d$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))

Data filtering

Filter non-compliant subjects & trials:

##Filter out adults, those that didn't get past more than 1 real trial, and trials with RTs that are way too long or short

adults <- d %>%
  filter(recognizer_age == 'adult')

didnt_start <- d %>%
  group_by(sessionId) %>%
  mutate(count_trials = max(trial_num)) %>%
  filter(count_trials < 5)

# do actual filtering here
d <- d %>%
  filter(!sessionId %in% didnt_start$sessionId) %>%
  filter(!sessionId %in% adults$sessionId) %>%
  filter(RT>100 & RT<10000) # super long or super short trial

Calculate performance on photo catch trials; visualize for each subject; compile list of off-task subjects

# threshold : 75% correct
threshold=.75

# compute avg correct photo trials for each subject
photo_correct <- d %>%
  group_by(sessionId,recognizer_age) %>%
  filter(producer_age == "photo") %>%
  summarize(avg_photo_correct = mean(correct_or_not)) 

# visualize these data by each age group
ggplot(photo_correct, aes(x=recognizer_age, y=avg_photo_correct, col=recognizer_age)) +
  geom_jitter(alpha=.6) +
  scale_color_viridis(discrete=TRUE) +
  geom_hline(yintercept=threshold)

# make a list of the subjects who don't meet our threshold
bad_subs <- photo_correct %>%
  filter(avg_photo_correct < threshold) ## includes subjects who got 75% correct, excludes all those below

Filter out subs who don’t meet photo correct threshold

# filter bad subs
d <- d %>%
  filter(!sessionId %in% bad_subs$sessionId)

# check that we did this right
photo_trials_by_sub <- d %>%
  filter(producer_age == 'photo') %>%
  group_by(sessionId) %>%
  summarize(avg_correct = mean(correct_or_not))

# make sure this is true.
assert_that(sum(photo_trials_by_sub$avg_correct<threshold)==0)

## [1] TRUE

Finally, filter kids that didn’t have valid trials on both photo/sketch trials

cor_by_trial_type <- d %>%
  mutate(photo_or_not = (producer_age == 'photo')) %>%
  group_by(photo_or_not,sessionId) %>%
  summarize(count_cor = sum(correct_or_not), count_items = n(), avg_correct = count_cor / count_items) 

only_one_type <- cor_by_trial_type %>%
  group_by(sessionId) %>%
  summarize(count_ids = n()) %>%
  filter(count_ids == 1)

# filter these subjects
d <- d %>%
  filter(!sessionId %in% only_one_type$sessionId)

Calculate number of trials per kid (not adult) after these exclusions and report exclusions

num_trials_per_kid <- d %>%
  # filter(!sessionId %in% adults$sessionId) %>% # exclude adults (prereg code, error)
  filter(recognizer_age != "adult") %>% # exclude adults
  group_by(sessionId) %>%
  summarize(max_trials = max(trial_num)) %>%
  summarize(average_trials = mean(max_trials))

num_kids_per_exp <- d %>%
  filter(recognizer_age != "adult") %>% # exclude adults
  group_by(exp,recognizer_age) %>%
  summarize(num_subs = length(unique(sessionId)))

##

First, we excluded children who started the game but did not complete more than 1 trial after the practice trials (N = 1068 participants) and the 238 adults who participated. We also excluded all trials with RTs slower than 10s or faster than 100ms, judging these to be off-task responses. Next, we excluded participants on the basis of their performance on practice and catch trials; given that these catch trials presented a very easy recognition task, we excluded participants who did not acheive at least 75% accuracy on these trials (N= 1064). The remaining 1975 who met this criterion completed an average of 20.98 trials. On total, we analyzed 38913 trials where children recognized each others drawings.

Exclusion rates in each age bin; see that we are mostly filtering out young kids not on task.

bad_subs_descriptives <- orig_d %>%
  filter(sessionId %in% bad_subs$sessionId) %>%
  group_by(sessionId) %>%
  summarize(count_trials = n(), recognizer_age = recognizer_age[1]) %>%
  group_by(recognizer_age) %>%
  summarize(count_subs = n(), avg_trials = mean(count_trials))

kable(bad_subs_descriptives)

recognizer_age	count_subs	avg_trials
age10	46	19.19565
age2	269	10.82528
age3	270	14.37778
age4	180	16.18889
age5	103	21.38835
age6	60	20.16667
age7	50	17.58000
age8	49	17.34694
age9	37	15.45946

Calculate number of subs left in each age

d %>%
  group_by(recognizer_age) %>%
  summarize(num_subs = length(unique(sessionId))) %>%
  kable()

recognizer_age	num_subs
age2	186
age3	329
age4	408
age5	301
age6	227
age7	188
age8	131
age9	84
age10	121

First set of descriptives

How are recognizers doing on photo trials aross age? Looks pretty flat.

by_recognizer_photo <- d %>% 
  group_by(recognizer_age) %>%
  filter(producer_age == 'photo') %>%
  group_by(sessionId,recognizer_age) %>% 
  summarize(indiv_photo_correct = mean(correct_or_not)) %>% # average first over individual participants
  group_by(recognizer_age) %>%
  multi_boot_standard(col = 'indiv_photo_correct') 

by_recognizer_photo$recognizer_age <- factor(by_recognizer_photo$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))

ggplot(by_recognizer_photo,aes(x=recognizer_age, y=mean, col = recognizer_age)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  scale_color_viridis(discrete = "TRUE") + 
  ylab('Proportion recognized - PHOTO') +
  ylim(.75,1) + # can't be lower than 75% correct or higher than 1
  theme_few()

Look at overall recognition by recognizer age; looks like we should exclude 2 year-olds.

by_recognizer <- d %>%
  group_by(recognizer_age) %>%
  filter(producer_age != 'photo') %>%
  multi_boot_standard(col = 'correct_or_not')

# make levels in reasonable order
by_recognizer$recognizer_age <- factor(by_recognizer$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))

ggplot(by_recognizer,aes(x=recognizer_age, y=mean, col = recognizer_age)) +
  theme_few() +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  scale_color_viridis(discrete = "TRUE") + 
  ylab('Proportion drawings recognized') +
  ylim(.25,.8) +
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Look at use of “don’t know” buton across age; roughly goes down with recognizer age.

by_recognizer_dont_know <- d %>%
  mutate(dont_know = (clicked_category == 'dont_know')) %>%
  group_by(recognizer_age) %>%
  multi_boot_standard(col = 'dont_know') 

ggplot(by_recognizer_dont_know,aes(x=recognizer_age, y=mean, col = recognizer_age)) +
  theme_few() +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  scale_color_viridis(discrete = "TRUE") + 
  ylab('Proportion trials selected "dont know"') +
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Updated filtering

For later, fine-grain analyses: given uneven distribution of subjects and performance from 2-year-olds, create new age bins that (1) exclude 2-year-olds and (2) group together older kids.

## filter recognition data to just kids and wrangle variables
kids_sketches_d <- d %>%
  filter(producer_age != 'photo') %>% # don't look at photo trials
  filter(recognizer_age != 'age2') %>%  # or 2-yr-olds
  mutate(recognizer_age_numeric = as.numeric(str_split_fixed(recognizer_age,'age',2)[,2])) %>% 
  mutate(recognizer_age_group = cut(recognizer_age_numeric, c(2.9, 4, 6, 8, 10.1), labels = c("3-4 yrs","5-6 yrs","7-8 yrs","9-10 yrs"))) %>%
  mutate(recognizer_age_group_numeric = cut(recognizer_age_numeric, c(2.9, 4, 6, 8, 10.1), labels=c(3,5,7,9))) %>%
  mutate(recognizer_age_group_numeric = as.numeric(recognizer_age_group_numeric))

And wrangle sketch paths so can be joined with classification data outputs

kids_sketches_d <- kids_sketches_d %>%
  mutate(sketch_path = as.factor(str_split_fixed(sketch_path,'/',2)[,2])) %>% 
  mutate(sketch_path = as.factor(str_split_fixed(sketch_path,'.png',2)[,1]))

Now also look at number of included subjects in each age group and experiment

kids_sketches_d %>%
  group_by(recognizer_age_group,exp) %>%
  summarize(num_subs = length(unique(sessionId))) %>%
  kable()

recognizer_age_group	exp	num_subs
3-4 yrs	animalgame	111
3-4 yrs	biganimalgame	211
3-4 yrs	objectgame	291
3-4 yrs	vehiclegame	124
5-6 yrs	animalgame	63
5-6 yrs	biganimalgame	174
5-6 yrs	objectgame	190
5-6 yrs	vehiclegame	101
7-8 yrs	animalgame	33
7-8 yrs	biganimalgame	81
7-8 yrs	objectgame	137
7-8 yrs	vehiclegame	68
9-10 yrs	animalgame	37
9-10 yrs	biganimalgame	57
9-10 yrs	objectgame	77
9-10 yrs	vehiclegame	34

Basic descriptives and plots

Examine recognition data

Examine recognition by recognizer’s age and experiment run

by_recognizer_filtered <- kids_sketches_d %>%
  group_by(recognizer_age, recognizer_age_numeric,exp) %>%
  multi_boot_standard(col = 'correct_or_not')

count_trials <- kids_sketches_d %>%
  group_by(recognizer_age) %>%
  summarize(count_trials = n())

by_recognizer_filtered$recognizer_age <- factor(by_recognizer_filtered$recognizer_age, levels = c('age2','age3','age4','age5','age6','age7','age8','age9','age10','adult'))

## Scale dots by number of trials in each bin to get a sense of variability
by_recognizer_filtered <-  by_recognizer_filtered %>%
  left_join(count_trials) %>%
  mutate(scale = count_trials / 1000) %>%
  group_by(exp)

## Joining, by = "recognizer_age"

ggplot(by_recognizer_filtered,aes(x=recognizer_age_numeric, y=mean, col = recognizer_age_numeric, size=scale)) +
  theme_few(base_size=18) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  geom_smooth(alpha=.2, color='grey') +
  scale_color_viridis(discrete = "FALSE") + 
  ylab('Proportion drawings recognized') +
  scale_size_area(max_size=1.5) +
  ylim(.25,.8) +
  xlab('Recognizer Age') +
  geom_hline(yintercept = .25, linetype="dashed") + 
  theme(axis.ticks.x = element_blank(), legend.position='none', aspect.ratio = 1) +
  facet_grid(~exp)

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_pointrange).

Also visualize by recognizer age GROUP x experiment run

## Doesn't seem quite right because there are such vastly different numbers of trials per kid.
# by_recognizer_filtered_age_group <- kids_sketches_d %>%
#   group_by(sessionId,recognizer_age_group,recognizer_age_group_numeric,exp) %>% 
#   summarize(indiv_photo_correct = mean(correct_or_not)) %>% # average first over individual participants
#   group_by(recognizer_age_group, recognizer_age_group_numeric,exp) %>%
#   multi_boot_standard(col = 'indiv_photo_correct')
# 
# count_subs_age_group <- kids_sketches_d %>%
#   distinct(sessionId,recognizer_age_group) %>%
#   group_by(recognizer_age_group) %>%
#   summarize(count_subs = n())

by_recognizer_filtered_age_group <- kids_sketches_d %>%
  group_by(recognizer_age_group,recognizer_age_group_numeric,exp) %>% 
  multi_boot_standard(col = 'correct_or_not')

count_trials_age_group <- kids_sketches_d %>%
  group_by(recognizer_age_group) %>%
  summarize(count_trials = n())

## Scale dots by number of trials in each bin to get a sense of variability
by_recognizer_filtered_age_group <-  by_recognizer_filtered_age_group %>%
  left_join(count_trials_age_group) %>%
  mutate(scale = count_trials) %>%
  group_by(exp)

## Joining, by = "recognizer_age_group"

ggplot(by_recognizer_filtered_age_group,aes(x=recognizer_age_group_numeric, y=mean, col = recognizer_age_group_numeric, size=scale)) +
  theme_few(base_size=18) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  # geom_smooth(alpha=.2, color='grey') +
  scale_color_viridis(discrete = "FALSE") + 
  ylab('Proportion drawings recognized') +
  scale_size_area(max_size=1.5) +
  ylim(.25,.8) +
  xlab('Recognizer Age Group') +
  geom_hline(yintercept = .25, linetype="dashed") + 
  theme(axis.ticks.x = element_blank(), legend.position='none', aspect.ratio = 1) +
  facet_grid(~exp)

How does recognition vary with the age of the PRODUCER of the drawing? Goes up, as we would expect

# summarize avg correct by producer age  
by_producer <- d %>%
  filter(recognizer_age!= 'adult') %>%
  group_by(producer_age) %>%
  multi_boot_standard(col = 'correct_or_not')

ggplot(by_producer,aes(x=producer_age, y=mean, col = producer_age)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  scale_color_viridis(discrete = "TRUE") + 
  ylab('Proportion recognized') +
  theme_few()

How does recognition break down by each category?

both_category <- d %>%
  group_by(producer_age,intended_category) %>%
  multi_boot_standard(col = 'correct_or_not') 

ggplot(both_category,aes(x=producer_age, y=mean, col=producer_age)) +
  theme_few() + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  ylab('average correct') +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), aspect.ratio = 1) +
  facet_wrap(~intended_category) +
  scale_color_viridis(discrete=TRUE)

Compute and plot recognition by sketch distinctiveness

We expect that the distintiveness of each drawing will be a major factor in how well it is recognized, and, further, that older children will be more sensitive to the presence of these distinctive features.

Import classifications data (4-way classifications separetly for each run of recoggames)

c_vehiclegame <- read.csv("compiled_classifications/classification-outputs-vehiclegame_C_0.1_460.csv") %>%
  as.tibble() %>%
  select(-X.1, -X) %>%
  mutate(denom = (airplane_prob + train_prob + boat_prob + car_prob) - target_label_prob) %>%
  mutate(log_odds = log(target_label_prob / denom)) %>% ## compute log odd probability
  mutate(exp='vehiclegame')

# now read it and join with other classifications  
c_animalgame <- read.csv("compiled_classifications/classification-outputs-animalgame_C_0.1_560.csv") %>%
  as.tibble() %>%
  select(-X.1, -X) %>%
  mutate(denom = (dog_prob + fish_prob + rabbit_prob + bird_prob) - target_label_prob) %>%
  mutate(log_odds = log(target_label_prob / denom)) %>%
  mutate(exp='animalgame') 

#
c_biganimalgame <- read.csv("compiled_classifications/classification-outputs-biganimalgame_C_0.1_600.csv") %>%
  as.tibble() %>%
  select(-X.1, -X) %>%
  mutate(denom = (bear_prob + sheep_prob + camel_prob + tiger_prob) - target_label_prob) %>%
  mutate(log_odds = log(target_label_prob / denom)) %>%
  mutate(exp='animalgame')


# now read it and join with other classifications  
c <- read.csv("compiled_classifications/classification-outputs-objectgame_C_0.1_450.csv") %>%
  as.tibble() %>%
  select(-X.1, -X) %>%
  mutate(denom = (bottle_prob + lamp_prob + hat_prob + cup_prob) - target_label_prob) %>%
  mutate(log_odds = log(target_label_prob / denom)) %>%
  mutate(exp='objectgame') %>% 
  full_join(c_vehiclegame) %>% 
  full_join(c_animalgame) %>%
  full_join(c_biganimalgame) %>%
  mutate(sketch_path = paste0(target_label,'_','sketch_age',age,'_cdm_',session_id)) %>%
  mutate(sketch_path = as.factor(sketch_path))

## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")

## Warning: Column `target_label` joining factors with different levels,
## coercing to character vector

## Warning: Column `session_id` joining factors with different levels,
## coercing to character vector

## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")

## Warning: Column `target_label` joining character vector and factor,
## coercing into character vector

## Warning: Column `session_id` joining character vector and factor, coercing
## into character vector

## Joining, by = c("index", "age", "target_label", "session_id", "correct_or_not", "target_label_prob", "denom", "log_odds", "exp")

## Warning: Column `target_label` joining character vector and factor,
## coercing into character vector

## Warning: Column `session_id` joining character vector and factor, coercing
## into character vector

Check how log-odds varies with target label probability and classifier outcome

ggplot(c, aes(x=target_label_prob, y=log_odds, col=correct_or_not)) +
  geom_jitter(alpha=.1) +
  facet_wrap(~target_label) +
  theme_few()

Compute distincitveness bins, join classifications & recognition data

num_bins=10

## compute bins for distinctiveness
sketch_by_distinctiveness <- c %>%
  mutate(distinct_index = ntile(log_odds,num_bins)) %>% ## compute bins based on log odds over entire dataset
  select(sketch_path, distinct_index, log_odds)

## join distinctiveness bins for each sketch in dataset
all_joined <- kids_sketches_d %>%
  left_join(sketch_by_distinctiveness)

## Joining, by = "sketch_path"

## Warning: Column `sketch_path` joining factors with different levels,
## coercing to character vector

How does distinctiveness bin vary with classifier probability /correctness?

ggplot(all_joined, aes(x=distinct_index, y=log_odds)) +
  geom_jitter(alpha=.03) +
  facet_wrap(~intended_category) +
  theme_few()

## Warning: Removed 1587 rows containing missing values (geom_point).

Plot distinctivenss by item effects

distinct_by_item <- all_joined %>%
  group_by(intended_category, distinct_index) %>%
  multi_boot_standard(col='correct_or_not') %>%
  group_by(distinct_index)

ggplot(distinct_by_item, aes(x=distinct_index, y=mean, col=intended_category)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  geom_smooth(alpha=.2, span=10, method='lm') +
  theme_few() +
  scale_x_continuous(breaks=seq(1,10,2)) +
  xlab('Distinctiveness Index') +
  ylab('Proportion recognized') +
  theme(legend.position='none') + 
  facet_wrap(~intended_category)

## Warning: Removed 5 rows containing non-finite values (stat_smooth).

## Warning: Removed 5 rows containing missing values (geom_pointrange).

Plot distinctivenss by age interaction

distinct_by_age <- all_joined %>%
  group_by(recognizer_age_numeric, distinct_index) %>%
  multi_boot_standard(col='correct_or_not') %>%
  group_by(distinct_index)

ggplot(distinct_by_age, aes(x=distinct_index, y=mean, col=recognizer_age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  geom_smooth(alpha=.2, span=10, method='lm') +
  facet_grid(~recognizer_age_numeric) +
  theme_few(base_size=18) + 
  theme(legend.position="none") +
  geom_hline(yintercept=.25, linetype='dashed',color='grey') + 
  xlab('Drawing distinctiveness') +
  ylab('Proportion drawings recognized') + 
  scale_color_viridis(discrete=FALSE) +
  scale_x_continuous(
    breaks=c(2,9),
    # labels=c('Least \n distinctive','Most \n distinctive')
    labels=c(' Least ',' Most ')
        ) +
  theme(axis.ticks.x=element_blank())

## Warning: Removed 8 rows containing non-finite values (stat_smooth).

## Warning: Removed 8 rows containing missing values (geom_pointrange).

Plot distinctivenss by age group interaction

distinct_by_age <- all_joined %>%
  group_by(recognizer_age_group, distinct_index) %>%
  multi_boot_standard(col='correct_or_not') %>%
  group_by(distinct_index)

(distinct_by_age_plot <- ggplot(distinct_by_age, aes(x=distinct_index, y=mean, col=recognizer_age_group)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  geom_smooth(alpha=.2, span=4, method='lm') +
  facet_grid(~recognizer_age_group) +
  theme_few(base_size = 22) + 
  scale_x_continuous(breaks=seq(1,10,2)) +
  xlab('Distinctiveness Index') +
  ylab('Proportion recognized') +
  theme(legend.position='none') + 
  scale_color_viridis(discrete=TRUE))

## Warning: Removed 4 rows containing non-finite values (stat_smooth).

## Warning: Removed 4 rows containing missing values (geom_pointrange).

# ggsave('DistinctByAge.svg',distinct_by_age_plot, width=11)

Inferential statistics

Examine how distinctivenss vs age group interact in glmer

model_glmer <- glmer(correct_or_not ~ scale(distinct_index)*scale(recognizer_age_group_numeric) + (distinct_index|intended_category) + (1|sessionId),  data = all_joined, family='binomial')
summary(model_glmer)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## correct_or_not ~ scale(distinct_index) * scale(recognizer_age_group_numeric) +  
##     (distinct_index | intended_category) + (1 | sessionId)
##    Data: all_joined
## 
##      AIC      BIC   logLik deviance df.resid 
##  32512.5  32577.8 -16248.2  32496.5    26033 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9426 -0.8200  0.2934  0.8222  3.0740 
## 
## Random effects:
##  Groups            Name           Variance Std.Dev. Corr 
##  sessionId         (Intercept)    0.250400 0.5004        
##  intended_category (Intercept)    0.237255 0.4871        
##                    distinct_index 0.002725 0.0522   -0.36
## Number of obs: 26041, groups:  sessionId, 1785; intended_category, 16
## 
## Fixed effects:
##                                                           Estimate
## (Intercept)                                                0.03209
## scale(distinct_index)                                      0.47327
## scale(recognizer_age_group_numeric)                        0.31436
## scale(distinct_index):scale(recognizer_age_group_numeric)  0.06317
##                                                           Std. Error
## (Intercept)                                                  0.11910
## scale(distinct_index)                                        0.04067
## scale(recognizer_age_group_numeric)                          0.01944
## scale(distinct_index):scale(recognizer_age_group_numeric)    0.01435
##                                                           z value Pr(>|z|)
## (Intercept)                                                 0.269    0.788
## scale(distinct_index)                                      11.637  < 2e-16
## scale(recognizer_age_group_numeric)                        16.169  < 2e-16
## scale(distinct_index):scale(recognizer_age_group_numeric)   4.402 1.07e-05
##                                                              
## (Intercept)                                                  
## scale(distinct_index)                                     ***
## scale(recognizer_age_group_numeric)                       ***
## scale(distinct_index):scale(recognizer_age_group_numeric) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scl(_) s(___)
## scl(dstnc_) 0.228               
## scl(rcg___) 0.011  0.017        
## sc(_):(___) 0.007  0.014  0.034

Inferential model #2: Recognizer age as a continous variable

model_glmer_age_cont <- glmer(correct_or_not ~ scale(distinct_index)*scale(recognizer_age_numeric) + (distinct_index|intended_category) + (1|sessionId),  data = all_joined, family='binomial')
summary(model_glmer_age_cont)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## correct_or_not ~ scale(distinct_index) * scale(recognizer_age_numeric) +  
##     (distinct_index | intended_category) + (1 | sessionId)
##    Data: all_joined
## 
##      AIC      BIC   logLik deviance df.resid 
##  32487.0  32552.3 -16235.5  32471.0    26033 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8427 -0.8191  0.2923  0.8256  3.0615 
## 
## Random effects:
##  Groups            Name           Variance Std.Dev. Corr 
##  sessionId         (Intercept)    0.242376 0.49232       
##  intended_category (Intercept)    0.235771 0.48556       
##                    distinct_index 0.002744 0.05238  -0.35
## Number of obs: 26041, groups:  sessionId, 1785; intended_category, 16
## 
## Fixed effects:
##                                                     Estimate Std. Error
## (Intercept)                                          0.03258    0.11936
## scale(distinct_index)                                0.47425    0.04079
## scale(recognizer_age_numeric)                        0.32806    0.01934
## scale(distinct_index):scale(recognizer_age_numeric)  0.06884    0.01446
##                                                     z value Pr(>|z|)    
## (Intercept)                                           0.273    0.785    
## scale(distinct_index)                                11.627  < 2e-16 ***
## scale(recognizer_age_numeric)                        16.962  < 2e-16 ***
## scale(distinct_index):scale(recognizer_age_numeric)   4.762 1.92e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scl(_) sc(__)
## scl(dstnc_) 0.238               
## scl(rcgn__) 0.010  0.019        
## scl(_):(__) 0.008  0.014  0.040

Drawing-Recognition-Analyses-Prereg

Bria Long

5/8/2019, updated 7/2019