true

This report covers 2017 data collection.

Data prep

Libraries.

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 3.4.2

## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──

## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.3.4     ✔ dplyr   0.7.4
## ✔ tidyr   0.7.2     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.2.0

## Warning: package 'tidyr' was built under R version 3.4.2

## Warning: package 'dplyr' was built under R version 3.4.2

## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(stringr)
library(langcog)

## 
## Attaching package: 'langcog'

## The following object is masked from 'package:base':
## 
##     scale

library(rms)

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

## Loading required package: SparseM

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:tidyr':
## 
##     expand

library(brms)

## Loading required package: Rcpp

## Warning: package 'Rcpp' was built under R version 3.4.2

## Loading 'brms' package (version 1.10.2). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.

## 
## Attaching package: 'brms'

## The following object is masked from 'package:lme4':
## 
##     ngrps

## The following object is masked from 'package:survival':
## 
##     kidney

library(knitr)
library(BayesFactor)

## Loading required package: coda

## ************
## Welcome to BayesFactor 0.9.12-2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************

library(forcats)
library(ggthemes)

## 
## Attaching package: 'ggthemes'

## The following objects are masked from 'package:langcog':
## 
##     scale_color_solarized, scale_colour_solarized,
##     scale_fill_solarized

theme_set(theme_few())

Read sheets.

raw_data <- read_csv("data/amazon_pragmatics_data.csv")

## Parsed with column specification:
## cols(
##   Age = col_double(),
##   Edad = col_character(),
##   Genero = col_character(),
##   `Lugar de grabacion` = col_character(),
##   Grabacion_order = col_integer(),
##   Grabacion_date = col_character(),
##   Orden = col_integer(),
##   `Trial 1` = col_character(),
##   `Trial 2` = col_character(),
##   `Trial 3` = col_character(),
##   `Trial 4` = col_character(),
##   `Trial 5` = col_character(),
##   `Trial 6` = col_character(),
##   `Trial 7` = col_character(),
##   `Trial 8` = col_character(),
##   `Trial 9` = col_character(),
##   `Trial 10` = col_character()
## )

orders <- read_csv("data/amazon_pragmatics_orders.csv")

## Parsed with column specification:
## cols(
##   Order = col_integer(),
##   Trial = col_integer(),
##   TrialClass = col_character(),
##   L = col_character(),
##   R = col_character(),
##   Target = col_character(),
##   `Trial Type` = col_character(),
##   Stimulus = col_character()
## )

Now merge in stimulus information.

d <- raw_data %>%
  mutate(Subid = 1:n()) %>%
  rename(Order = Orden, 
         Gender = Genero) %>%
  gather(Trial, Choice, `Trial 1`:`Trial 10`) %>%
  mutate(Trial = as.numeric(str_sub(Trial,start = -2, -1))) %>%
  left_join(orders) %>%
  rename(TrialType = `Trial Type`) %>% 
  mutate(Correct = Choice == Target, 
         TargetStimulus = ifelse(Target == "L", L, R)) %>%
  select(Subid, Age, Gender, Order, Trial, Choice, Target, TrialType, Stimulus, TargetStimulus, Correct) %>%
  mutate(age_grp = case_when(
    Age > 4 & Age <= 6 ~ "4 - 6",
    Age > 6 & Age <= 8 ~ "6 - 8",
    Age > 8 & Age <= 10 ~ "8 - 10",
    TRUE ~ "exclude"),
    TrialType = fct_recode(TrialType, 
                           `Control-Double` = "ControlDouble",
                           `Control-Single` = "ControlSingle",
                           `Warm-Up` = "Warm-up",
                           `Pragmatic Inference` = "Test"))

## Joining, by = c("Order", "Trial")

subs <- d %>%
  group_by(Subid, age_grp) %>%
  summarise(Age = Age[1], 
            n_trials = sum(!is.na(Correct)), 
            correct = mean(Correct), 
            gender = Gender[1]) 

ggplot(subs, aes(x = Age)) + 
  geom_histogram(binwidth = .5, center = 0)

## Warning: Removed 6 rows containing non-finite values (stat_bin).

subs %>%
  group_by(age_grp) %>%
  summarise(mean_age = mean(Age, na.rm=TRUE),
            sd_age = sd(Age, na.rm=TRUE),
            max_age = max(Age, na.rm=TRUE), 
            min_age = min(Age, na.rm=TRUE),
            n_trials = mean(n_trials), 
            n = n(), 
            missing_age = sum(age_grp == "exclude"), 
            n_male = sum(gender == "M"), 
            prop_male = n_male / n) %>%
  kable(digits = 2)

age_grp	mean_age	sd_age	max_age	min_age	n_trials	n	missing_age	n_male	prop_male
4 - 6	5.43	0.49	6.0	4.3	10.00	11	0	3	0.27
6 - 8	7.09	0.50	8.0	6.1	9.93	30	0	16	0.53
8 - 10	9.00	0.59	10.0	8.1	10.00	35	0	13	0.37
exclude	10.45	0.49	10.8	10.1	10.00	8	8	3	0.38

Primary analysis

Now get grand means.

mbs <- d %>%
  group_by(TrialType, Age, age_grp, Subid) %>%
  summarise(Correct = mean(Correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(TrialType) %>%
  do(data.frame(rbind(smean.cl.boot(.$Correct))))

ggplot(mbs, aes(x = TrialType, y = Correct, group = TrialType)) + 
  geom_jitter(width = .05, height = .05) + 
  geom_pointrange(col = "red", data = ms, 
                  aes(y = Mean, ymin = Lower, ymax = Upper)) + 
  geom_hline(lty = 2, yintercept = .5)

Check for developmental trends. First continuous.

ggplot(mbs, aes(x = Age, y = Correct, col = TrialType)) + 
  geom_jitter(width = .05, height = .05) + 
  geom_smooth(span = 1) + 
  geom_hline(lty = 2, yintercept = .5) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age Group (years)") + 
  ylim(0,1) + 
  scale_color_ptol()

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

## `geom_smooth()` using method = 'loess'

## Warning: Removed 24 rows containing non-finite values (stat_smooth).

## Warning: Removed 133 rows containing missing values (geom_point).

Now discrete.

ms <- mbs %>%
  group_by(TrialType, age_grp) %>%
  do(data.frame(rbind(smean.cl.boot(.$Correct)))) 

ggplot(filter(mbs, age_grp != "exclude"),
       aes(x = age_grp, y = Correct, col = TrialType)) + 
  # geom_jitter(width = .05, height = .05, alpha = .3) + 
  geom_pointrange(data = filter(ms, age_grp != "exclude"), 
                  aes(y = Mean, ymin = Lower, ymax = Upper), 
                  position = position_dodge(width = .05)) + 
  geom_line(data = filter(ms, age_grp != "exclude"), 
            aes(y = Mean, group = TrialType)) + 
  geom_hline(lty = 2, yintercept = .5) + 
  scale_color_ptol(name = "Trial Type" ) + 
  ylab("Proportion Correct") + 
  xlab("Age Group (years)") + 
  ylim(0,1) + 
  theme(legend.position = "bottom")

Stats

Model

Maximal random effects structure.

d$TrialType <- fct_relevel(d$TrialType, "Pragmatic Inference")
bayes_mod <- brm(as.numeric(Correct) ~ TrialType * scale(Age) 
                 + (TrialType | Subid) 
                 + (TrialType * scale(Age) | Stimulus), 
                 family = "bernoulli", 
                 data = filter(d, age_grp != "exclude"))

## Warning: Rows containing NAs were excluded from the model

## Compiling the C++ model

## Start sampling

## 
## SAMPLING FOR MODEL 'bernoulli(logit) brms-model' NOW (CHAIN 1).
## 
## Gradient evaluation took 0.000608 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 6.08 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:    1 / 2000 [  0%]  (Warmup)
## Iteration:  200 / 2000 [ 10%]  (Warmup)
## Iteration:  400 / 2000 [ 20%]  (Warmup)
## Iteration:  600 / 2000 [ 30%]  (Warmup)
## Iteration:  800 / 2000 [ 40%]  (Warmup)
## Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Iteration: 2000 / 2000 [100%]  (Sampling)
## 
##  Elapsed Time: 80.1567 seconds (Warm-up)
##                51.8538 seconds (Sampling)
##                132.011 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'bernoulli(logit) brms-model' NOW (CHAIN 2).
## 
## Gradient evaluation took 0.000634 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 6.34 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:    1 / 2000 [  0%]  (Warmup)
## Iteration:  200 / 2000 [ 10%]  (Warmup)
## Iteration:  400 / 2000 [ 20%]  (Warmup)
## Iteration:  600 / 2000 [ 30%]  (Warmup)
## Iteration:  800 / 2000 [ 40%]  (Warmup)
## Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Iteration: 2000 / 2000 [100%]  (Sampling)
## 
##  Elapsed Time: 86.1899 seconds (Warm-up)
##                102.442 seconds (Sampling)
##                188.632 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'bernoulli(logit) brms-model' NOW (CHAIN 3).
## 
## Gradient evaluation took 0.000429 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 4.29 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:    1 / 2000 [  0%]  (Warmup)
## Iteration:  200 / 2000 [ 10%]  (Warmup)
## Iteration:  400 / 2000 [ 20%]  (Warmup)
## Iteration:  600 / 2000 [ 30%]  (Warmup)
## Iteration:  800 / 2000 [ 40%]  (Warmup)
## Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Iteration: 2000 / 2000 [100%]  (Sampling)
## 
##  Elapsed Time: 79.7076 seconds (Warm-up)
##                98.5403 seconds (Sampling)
##                178.248 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'bernoulli(logit) brms-model' NOW (CHAIN 4).
## 
## Gradient evaluation took 0.000534 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 5.34 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:    1 / 2000 [  0%]  (Warmup)
## Iteration:  200 / 2000 [ 10%]  (Warmup)
## Iteration:  400 / 2000 [ 20%]  (Warmup)
## Iteration:  600 / 2000 [ 30%]  (Warmup)
## Iteration:  800 / 2000 [ 40%]  (Warmup)
## Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Iteration: 2000 / 2000 [100%]  (Sampling)
## 
##  Elapsed Time: 91.4174 seconds (Warm-up)
##                52.5844 seconds (Sampling)
##                144.002 seconds (Total)

## Warning: There were 27 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

## Warning: Examine the pairs() plot to diagnose sampling problems

kable(fixef(bayes_mod), digits = 2)

	Estimate	Est.Error	2.5%ile	97.5%ile
Intercept	0.54	0.50	-0.53	1.54
TrialTypeControlMDouble	3.09	1.91	0.16	7.37
TrialTypeControlMSingle	1.23	3.04	-5.08	8.94
TrialTypeWarmMUp	6.96	3.19	1.44	13.47
scaleAge	0.43	0.43	-0.43	1.30
TrialTypeControlMDouble:scaleAge	0.00	0.83	-1.35	1.96
TrialTypeControlMSingle:scaleAge	1.43	4.50	-6.19	13.91
TrialTypeWarmMUp:scaleAge	-0.32	1.59	-3.19	2.62

Hypothesis tests

Control-single all kids.

ttestBF(filter(mbs,TrialType=="Control-Single")$Correct, mu = .5)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 544561976 ±0%
## 
## Against denominator:
##   Null, mu = 0.5 
## ---
## Bayes factor type: BFoneSample, JZS

Control-single 6-8s and 8-10s:

ttestBF(filter(mbs,TrialType=="Control-Single" & age_grp == "4 - 6")$Correct, mu = .5)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 0.51032 ±0.01%
## 
## Against denominator:
##   Null, mu = 0.5 
## ---
## Bayes factor type: BFoneSample, JZS

ttestBF(filter(mbs,TrialType=="Control-Single" & age_grp == "6 - 8")$Correct, mu = .5)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 1.339463 ±0%
## 
## Against denominator:
##   Null, mu = 0.5 
## ---
## Bayes factor type: BFoneSample, JZS

ttestBF(filter(mbs,TrialType=="Control-Single" & age_grp == "8 - 10")$Correct, mu = .5)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 38512820845 ±0%
## 
## Against denominator:
##   Null, mu = 0.5 
## ---
## Bayes factor type: BFoneSample, JZS

ttestBF(filter(mbs, TrialType=="Pragmatic Inference")$Correct, mu = .5)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 4.834431 ±0%
## 
## Against denominator:
##   Null, mu = 0.5 
## ---
## Bayes factor type: BFoneSample, JZS

Just 8-10yos

ttestBF(filter(mbs,TrialType=="Pragmatic Inference" & age_grp == "4 - 6")$Correct, mu = .5)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 0.3400324 ±0.02%
## 
## Against denominator:
##   Null, mu = 0.5 
## ---
## Bayes factor type: BFoneSample, JZS

ttestBF(filter(mbs,TrialType=="Pragmatic Inference" & age_grp == "8 - 10")$Correct, mu = .5)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 266.4931 ±0%
## 
## Against denominator:
##   Null, mu = 0.5 
## ---
## Bayes factor type: BFoneSample, JZS

ttestBF(filter(mbs,TrialType=="Pragmatic Inference" & age_grp == "8 - 10")$Correct, mu = .5)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 266.4931 ±0%
## 
## Against denominator:
##   Null, mu = 0.5 
## ---
## Bayes factor type: BFoneSample, JZS

Sub analyses

Items

Look at item effects.

mbs <- d %>%
  group_by(TrialType, Stimulus, Subid) %>%
  summarise(Correct = mean(Correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(TrialType, Stimulus) %>%
  do(data.frame(rbind(smean.cl.boot(.$Correct))))

ggplot(mbs, aes(x = Stimulus, y = Correct, col = TrialType)) + 
  geom_pointrange(data = ms, 
                  aes(y = Mean, ymin = Lower, ymax = Upper), 
                  position = position_dodge(width = .05)) + 
  geom_hline(lty = 2, yintercept = .5) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Side bias

Look at side biase effects. Not any huge evidence of side bias, though overall test performance higher when target is on the R.

mbs <- d %>%
  group_by(TrialType, Target, Subid) %>%
  summarise(Correct = mean(Correct, na.rm=TRUE),
            n = n())

ms <- mbs %>%
  group_by(TrialType, Target) %>%
  do(data.frame(rbind(smean.cl.boot(.$Correct))))

ggplot(mbs, aes(x = Target, y = Correct, col = TrialType)) + 
  geom_pointrange(data = ms, 
                  aes(y = Mean, ymin = Lower, ymax = Upper), 
                  position = position_dodge(width = .05)) + 
  geom_hline(lty = 2, yintercept = .5) + 
  scale_color_solarized() + 
  ylab("Proportion correct") + 
  xlab("Age (years)") + 
  ylim(0,1) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Analysis of Amazon Stiller Task Data

Martin Fortier, Danielle Kellier, and Mike Frank

2018-03-14