Note that this report only covers “V2” of the experiment.

Data prep

Libraries.

library(readxl)
library(tidyverse)

## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr

## Warning: package 'ggplot2' was built under R version 3.3.2

## Conflicts with tidy packages ----------------------------------------------

## filter(): dplyr, stats
## lag():    dplyr, stats

library(langcog)

## 
## Attaching package: 'langcog'

## The following object is masked from 'package:base':
## 
##     scale

theme_set(theme_bw())

Read sheets.

raw_data <- read_excel("data/summer2016-v2.xlsx", sheet = 1)
slide_info <- read_excel("data/summer2016-v2.xlsx", sheet = 2)
targets <- read_excel("data/summer2016-v2.xlsx", sheet = 3)

Now merge in stimulus information.

df <- raw_data %>%
  mutate(subid = factor(paste0(as.character(id), "-",
                        as.character(date)))) %>%
  gather(stimulus, choice, sacha_vaca:mototaxi) %>%
  left_join(rename(slide_info, choice = code)) %>%
  left_join(targets) %>%
  mutate(correct = choice == target_code) %>%
  select(-id)

## Joining, by = c("stimulus", "choice")

## Joining, by = c("order", "stimulus")

Primary analysis

Now get grand means.

mbs <- df %>%
  group_by(trial_type, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type) %>%
  multi_boot_standard(col = "correct")

## Joining, by = "trial_type"

ggplot(mbs, aes(x = trial_type, y = correct)) + 
  geom_jitter(width = .05, height = .05) + 
  geom_pointrange(col = "red", data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper)) + 
  geom_hline(lty = 2, yintercept = .33)

Performance is low. Check for developmental trends. Only older kids perform even above chance on control trials.

mbs <- df %>%
  group_by(trial_type, age, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type, age) %>%
  multi_boot_standard(col = "correct")

## Joining, by = c("trial_type", "age")

ggplot(mbs, aes(x = age, y = correct, col = trial_type)) + 
  geom_jitter(width = .05, height = .05) + 
  geom_linerange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)

## Warning: Ignoring unknown aesthetics: y

## Warning: Removed 23 rows containing missing values (geom_point).

Sub analyses - items and items/age

Look at item effects.

mbs <- df %>%
  group_by(trial_type, stimulus, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type, stimulus) %>%
  multi_boot_standard(col = "correct", na.rm=TRUE)

## Joining, by = c("trial_type", "stimulus")

ggplot(mbs, aes(x = stimulus, y = correct, col = trial_type)) + 
  geom_pointrange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

And cross with age?

mbs <- df %>%
  group_by(trial_type, age, stimulus, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type, age, stimulus) %>%
  multi_boot_standard(col = "correct", na.rm=TRUE)

## Joining, by = c("trial_type", "age", "stimulus")

ggplot(mbs, aes(x = age, y = correct, col = trial_type)) + 
  facet_wrap(~stimulus) + 
  geom_pointrange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)

Site analysis

mbs <- df %>%
  group_by(trial_type, age, location, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(trial_type, age, location) %>%
  multi_boot_standard(col = "correct", na.rm=TRUE)

## Joining, by = c("trial_type", "age", "location")

ggplot(mbs, aes(x = age, y = correct, col = trial_type)) + 
  facet_wrap(~location) + 
  geom_pointrange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)

Choice distribution

Examine distribution of choices, rather than correctness.

It appears that most kids (at least until age 6) are not understanding the task much, but they do choose “both” slightly more than target. But they are also pretty close to chance.

mbs <- df %>%
  filter(trial_type == "inference") %>%
  group_by(age, subid, item_type) %>%
  summarise(n = n()) %>%
  group_by(age, subid) %>%
  mutate(prop = n / sum(n), 
         item_type = factor(item_type)) %>%
  complete(item_type, nesting(subid), fill = list(prop = 0))

ms <- mbs %>%
  group_by(age, item_type) %>%
  multi_boot_standard(col = "prop", na.rm=TRUE)

## Joining, by = c("age", "item_type")

ggplot(mbs, aes(x = age, y = prop, col = item_type)) + 
  geom_pointrange(data = ms, 
                  aes(y = mean, ymin = ci_lower, ymax = ci_upper), 
                 position = position_dodge(width = .05)) + 
  geom_line(data = ms, 
                  aes(y = mean)) + 
  geom_hline(lty = 2, yintercept = .33) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1)

Analysis of Amazon Stiller Task Data

Martin Fortier, Danielle Kellier, and Mike Frank

2016-12-14

Data prep

Primary analysis

Sub analyses - items and items/age

Site analysis

Choice distribution