Note that this report only covers “V2” of the experiment.

Data prep

Libraries.

library(readxl)
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.3.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(langcog)
## 
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
## 
##     scale
theme_set(theme_bw())

Read sheets.

raw_data <- read_excel("data/summer2016-v2.xlsx", sheet = 1)
slide_info <- read_excel("data/summer2016-v2.xlsx", sheet = 2)
targets <- read_excel("data/summer2016-v2.xlsx", sheet = 3)

Now merge in stimulus information.

df <- raw_data %>%
  mutate(subid = factor(paste0(as.character(id), "-",
                        as.character(date)))) %>%
  gather(stimulus, choice, sacha_vaca:mototaxi) %>%
  left_join(rename(slide_info, choice = code)) %>%
  left_join(targets) %>%
  mutate(correct = choice == target_code) %>%
  select(-id) 
## Joining, by = c("stimulus", "choice")
## Joining, by = c("order", "stimulus")

Primary analysis

Now get grand means.

mbs <- df %>%
  group_by(trial_type, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type) %>%
  multi_boot_standard(col = "correct")
## Joining, by = "trial_type"
ggplot(mbs, aes(x = trial_type, y = correct)) + 
  geom_jitter(width = .05, height = .05) + 
  geom_pointrange(col = "red", data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper)) + 
  geom_hline(lty = 2, yintercept = .33)

Performance is low. Check for developmental trends. Only older kids perform even above chance on control trials.

mbs <- df %>%
  group_by(trial_type, age, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type, age) %>%
  multi_boot_standard(col = "correct")
## Joining, by = c("trial_type", "age")
ggplot(mbs, aes(x = age, y = correct, col = trial_type)) + 
  geom_jitter(width = .05, height = .05) + 
  geom_linerange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)
## Warning: Ignoring unknown aesthetics: y
## Warning: Removed 23 rows containing missing values (geom_point).

Sub analyses - items and items/age

Look at item effects.

mbs <- df %>%
  group_by(trial_type, stimulus, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type, stimulus) %>%
  multi_boot_standard(col = "correct", na.rm=TRUE)
## Joining, by = c("trial_type", "stimulus")
ggplot(mbs, aes(x = stimulus, y = correct, col = trial_type)) + 
  geom_pointrange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

And cross with age?

mbs <- df %>%
  group_by(trial_type, age, stimulus, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type, age, stimulus) %>%
  multi_boot_standard(col = "correct", na.rm=TRUE)
## Joining, by = c("trial_type", "age", "stimulus")
ggplot(mbs, aes(x = age, y = correct, col = trial_type)) + 
  facet_wrap(~stimulus) + 
  geom_pointrange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)

Site analysis

mbs <- df %>%
  group_by(trial_type, age, location, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type, age, location) %>%
  multi_boot_standard(col = "correct", na.rm=TRUE)
## Joining, by = c("trial_type", "age", "location")
ggplot(mbs, aes(x = age, y = correct, col = trial_type)) + 
  facet_wrap(~location) + 
  geom_pointrange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)

Choice distribution

Examine distribution of choices, rather than correctness.

It appears that most kids (at least until age 6) are not understanding the task much, but they do choose “both” slightly more than target. But they are also pretty close to chance.

mbs <- df %>%
  filter(trial_type == "inference") %>%
  group_by(age, subid, item_type) %>%
  summarise(n = n()) %>%
  group_by(age, subid) %>%
  mutate(prop = n / sum(n), 
         item_type = factor(item_type)) %>%
  complete(item_type, nesting(subid), fill = list(prop = 0))

ms <- mbs %>%
  group_by(age, item_type) %>%
  multi_boot_standard(col = "prop", na.rm=TRUE)
## Joining, by = c("age", "item_type")
ggplot(mbs, aes(x = age, y = prop, col = item_type)) + 
  geom_pointrange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)