Note that this report only covers “V2” of the experiment.
Data prep
Libraries.
library(readxl)
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.3.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(langcog)
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
theme_set(theme_bw())
Read sheets.
raw_data <- read_excel("data/summer2016-v2.xlsx", sheet = 1)
slide_info <- read_excel("data/summer2016-v2.xlsx", sheet = 2)
targets <- read_excel("data/summer2016-v2.xlsx", sheet = 3)
Now merge in stimulus information.
df <- raw_data %>%
mutate(subid = factor(paste0(as.character(id), "-",
as.character(date)))) %>%
gather(stimulus, choice, sacha_vaca:mototaxi) %>%
left_join(rename(slide_info, choice = code)) %>%
left_join(targets) %>%
mutate(correct = choice == target_code) %>%
select(-id)
## Joining, by = c("stimulus", "choice")
## Joining, by = c("order", "stimulus")
Primary analysis
Now get grand means.
mbs <- df %>%
group_by(trial_type, subid) %>%
summarise(correct = mean(correct, na.rm=TRUE),
n = n())
ms <- mbs %>%
group_by(trial_type) %>%
multi_boot_standard(col = "correct")
## Joining, by = "trial_type"
ggplot(mbs, aes(x = trial_type, y = correct)) +
geom_jitter(width = .05, height = .05) +
geom_pointrange(col = "red", data = ms,
aes(y = mean, ymin = ci_lower, ymax = ci_upper)) +
geom_hline(lty = 2, yintercept = .33)

Performance is low. Check for developmental trends. Only older kids perform even above chance on control trials.
mbs <- df %>%
group_by(trial_type, age, subid) %>%
summarise(correct = mean(correct, na.rm=TRUE),
n = n())
ms <- mbs %>%
group_by(trial_type, age) %>%
multi_boot_standard(col = "correct")
## Joining, by = c("trial_type", "age")
ggplot(mbs, aes(x = age, y = correct, col = trial_type)) +
geom_jitter(width = .05, height = .05) +
geom_linerange(data = ms,
aes(y = mean, ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .05)) +
geom_line(data = ms,
aes(y = mean)) +
geom_hline(lty = 2, yintercept = .33) +
scale_color_solarized() +
ylab("Proportion correct") +
xlab("Age (years)") +
ylim(0,1)
## Warning: Ignoring unknown aesthetics: y
## Warning: Removed 23 rows containing missing values (geom_point).

Sub analyses - items and items/age
Look at item effects.
mbs <- df %>%
group_by(trial_type, stimulus, subid) %>%
summarise(correct = mean(correct, na.rm=TRUE),
n = n())
ms <- mbs %>%
group_by(trial_type, stimulus) %>%
multi_boot_standard(col = "correct", na.rm=TRUE)
## Joining, by = c("trial_type", "stimulus")
ggplot(mbs, aes(x = stimulus, y = correct, col = trial_type)) +
geom_pointrange(data = ms,
aes(y = mean, ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .05)) +
geom_line(data = ms,
aes(y = mean)) +
geom_hline(lty = 2, yintercept = .33) +
scale_color_solarized() +
ylab("Proportion correct") +
xlab("Age (years)") +
ylim(0,1)
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

And cross with age?
mbs <- df %>%
group_by(trial_type, age, stimulus, subid) %>%
summarise(correct = mean(correct, na.rm=TRUE),
n = n())
ms <- mbs %>%
group_by(trial_type, age, stimulus) %>%
multi_boot_standard(col = "correct", na.rm=TRUE)
## Joining, by = c("trial_type", "age", "stimulus")
ggplot(mbs, aes(x = age, y = correct, col = trial_type)) +
facet_wrap(~stimulus) +
geom_pointrange(data = ms,
aes(y = mean, ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .05)) +
geom_line(data = ms,
aes(y = mean)) +
geom_hline(lty = 2, yintercept = .33) +
scale_color_solarized() +
ylab("Proportion correct") +
xlab("Age (years)") +
ylim(0,1)

Site analysis
mbs <- df %>%
group_by(trial_type, age, location, subid) %>%
summarise(correct = mean(correct, na.rm=TRUE),
n = n())
ms <- mbs %>%
group_by(trial_type, age, location) %>%
multi_boot_standard(col = "correct", na.rm=TRUE)
## Joining, by = c("trial_type", "age", "location")
ggplot(mbs, aes(x = age, y = correct, col = trial_type)) +
facet_wrap(~location) +
geom_pointrange(data = ms,
aes(y = mean, ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .05)) +
geom_line(data = ms,
aes(y = mean)) +
geom_hline(lty = 2, yintercept = .33) +
scale_color_solarized() +
ylab("Proportion correct") +
xlab("Age (years)") +
ylim(0,1)

Choice distribution
Examine distribution of choices, rather than correctness.
It appears that most kids (at least until age 6) are not understanding the task much, but they do choose “both” slightly more than target. But they are also pretty close to chance.
mbs <- df %>%
filter(trial_type == "inference") %>%
group_by(age, subid, item_type) %>%
summarise(n = n()) %>%
group_by(age, subid) %>%
mutate(prop = n / sum(n),
item_type = factor(item_type)) %>%
complete(item_type, nesting(subid), fill = list(prop = 0))
ms <- mbs %>%
group_by(age, item_type) %>%
multi_boot_standard(col = "prop", na.rm=TRUE)
## Joining, by = c("age", "item_type")
ggplot(mbs, aes(x = age, y = prop, col = item_type)) +
geom_pointrange(data = ms,
aes(y = mean, ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .05)) +
geom_line(data = ms,
aes(y = mean)) +
geom_hline(lty = 2, yintercept = .33) +
scale_color_solarized() +
ylab("Proportion correct") +
xlab("Age (years)") +
ylim(0,1)
