knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(warning = FALSE)

options(width = 400)

library(knitr)
library(papeR)
library(stringr)
library(knitr)
library(papeR)
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(cache=TRUE)
library(compute.es)
library(metafor)
library(skewt)
library(fitdistrplus)
library(gamlss)
library(gamlss.dist)
library(lme4)
library(ez)
library(jsonlite)
library(ggplot2)
library(gridExtra)
library(dplyr)
library(doBy)
library(sn)
library(bootstrap)
library(lubridate)
library(ggplot2)
library(readr)
library(dplyr)
## for bootstrapping 95% confidence intervals -- from Mike Frank https://github.com/langcog/KTE/blob/master/mcf.useful.R
library(bootstrap)
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
  quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)} #  mean(x,na.rm=na.rm) -
ci.high <- function(x,na.rm=T) {
  quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) } #- mean(x,na.rm=na.rm)}



read_TD_data <- function(csv,type){
word <- bind_rows(lapply(csv, read.csv, 
                         colClasses = c("num_siblings"="character",
                                        "birth_order"="character",
                                        "subject_id"="character")))
word$DOT <- parse_date_time(word$date_stamp, "%m/%d/%Y")
word$DOB <- parse_date_time(word$date_of_birth,c("%Y-%m-%d","%d/%m.%y"))
word$age_weeks <- difftime(word$DOT,word$DOB,units = "weeks")
word$age <- as.numeric(floor(word$age_weeks/26)/2)
word <- subset(word, age > 1 & age <= 9)
word$age <- ordered(word$age)
word$Experiment = type
word$trial_type <- as.factor(word$trial_type)
#word <- subset(word, trial_type == "critical" )
return(word)
}

instr_fam_csv <- dir(path = "./data_followup/",pattern='*.csv$', recursive = T,full.names = T)

ins_fam <- read_TD_data(instr_fam_csv,"Instrument Verb Familiar")

# discard intro trial
ins_fam <- subset(ins_fam, trial_id != "intro")
ins_fam$condition <- as.factor(ifelse(ins_fam$trial_type == "critical","ins_verb_WE","noun_WE"))

# Add Demog Info
demog <- read.csv("ins_verb_WE_demog.csv", header =T)
ins_fam <- ins_fam[ins_fam$subject_id %in% demog$subject_id,]
ins_fam <- subset(ins_fam, trial_id != "intro")
ins_fam$condition <- as.factor(ifelse(ins_fam$trial_type == "critical","ins_verb_WE","noun_WE"))

ins_fam <- left_join(ins_fam, demog, by = "subject_id")
ins_fam$Age_Group <- as.factor(ins_fam$Age_Group)

1 Familiar verbs

In an ipad task, children were shown videos in which Elmo taught them his names for familiar actions (sweeping, hammering, etc). On control trials, they were also taught names for object (ball, book, etc). We measured what they learned about these novel labels.

In the WE (Word Extension) condition, we measured if they extended the name for an action to the name for an instrument (e.g., wugging is done with a wug).
In the ME (Mutual Exclusivity) condition, we measured if they did that extension automatically, for instance, having learned wugging, would they infer that the object acted upon is called wug.
In the MOR (Morphology) condition, we replicated the Word Extension task, but tested if children would be better able to do this with morphological support (e.g., wugging is done with a wugger).

Noun trials were control trials. Children were taught a name for an object (this thing is a blicket). In the WE and MOR conditions, they were asked to identify the blicket. In the ME condition they were asked to identify the dax.

instr_expt1_csv <- dir(path = "./data_followup/",pattern='*.csv$', recursive = T,full.names = T)

expt1_fam <- read_TD_data(instr_expt1_csv,"Instrument Verb expt1")

# discard intro trial
expt1_fam <- subset(expt1_fam, trial_id != "intro")
expt1_fam <- subset(expt1_fam, testing_loc != "pilot")
#ins_fam$condition <- as.factor(ifelse(ins_fam$trial_type == "critical","ins_verb_WE","noun_WE"))

expt1_fam$expt <- NA
expt1_fam$expt[grep("WE",expt1_fam$trial_id)] <- "WE"
expt1_fam$expt[grep("ME",expt1_fam$trial_id)] <- "ME"
expt1_fam$expt[grep("MOR",expt1_fam$trial_id)] <- "MOR"
expt1_fam$expt <- ordered(expt1_fam$expt, levels = c("WE","MOR","ME"))
expt1_fam$word_class <- "Verb"
expt1_fam$word_class[expt1_fam$trial_type %in% c("training","filler")] <- "Noun"
expt1_fam$word <- str_split_fixed(expt1_fam$trial_id,"_",3)[,1]


# Add Demog Info
demog <- read.csv("ins_verb_WE_demog.csv", header =T)
#setdiff(expt1_fam$subject_id,demog$subject_id)
#setdiff(demog$subject_id,expt1_fam$subject_id)

expt1_fam <- expt1_fam[expt1_fam$subject_id %in% demog$subject_id,]
expt1_fam$condition <- as.factor(ifelse(expt1_fam$trial_type == "critical","ins_verb_WE","noun_WE"))
#ins_fam <- subset(ins_fam, trial_id != "intro")

expt1_fam <- left_join(expt1_fam, demog, by = c("subject_id","expt"))
expt1_fam$Age_Group <- as.factor(expt1_fam$Age_Group)


expt1_fam$known <- NA
for (i in c("hammer","scoop","shovel","rake","brush")){
  expt1_fam[expt1_fam$word == i,]$known <- expt1_fam[expt1_fam$word == i,c(i)]
}

In the WE condition there were 124 children, who were evenly spaced in 6 month groups between 2 and 4.5 years.
In the ME condition there were 84 children, who were evenly spaced in 6 month groups between 3 and 4.5 years.
In the MOR condition there were 85 children, who were evenly spaced in 6 month groups between 2 and 3.5 years.

1.1 Graphs

Note that WE = word extension condition (this is wugging, can you find the wug), ME = mutual exclusivity condition (this is wugging, can you find the wug), MOR = morphology word extension condition (this is wugging, can you find the wuger).

expt1_summary <- expt1_fam %>%
  dplyr::group_by(word_class,expt,subject_id,Age_Group) %>%
  dplyr::select(rt,accuracy,word_class,expt,subject_id,Age_Group) %>%
  dplyr::summarise(acc.m = mean(accuracy,na.rm = T),rt.m = mean(rt,na.rm = T)) %>% 
  dplyr::group_by(word_class,expt,Age_Group) %>%
  dplyr::select(acc.m,rt.m,word_class,expt,subject_id,Age_Group) %>%
  dplyr::summarise(acc.mean = mean(acc.m,na.rm = T),acc.sd = sd(acc.m,na.rm=T),rt.mean = mean(rt.m, na.rm = T), rt.sd = sd(rt.m,na.rm = T),acc.low = ci.low(acc.m),acc.high = ci.high(acc.m),rt.low = ci.low(rt.m),rt.high = ci.high(rt.m)) 

expt1_summary_subj <- expt1_fam %>%
  dplyr::group_by(word_class,expt,subject_id,Age.Test) %>%
  dplyr::select(rt,accuracy,word_class,expt,subject_id,Age.Test) %>%
  dplyr::summarise(acc.m = mean(accuracy,na.rm = T),rt.m = mean(rt,na.rm = T)) 

dodge <- position_dodge(width=0.9)

expt1_summary$expt <- ordered(expt1_summary$expt, 
                              levels = c("WE","MOR","ME"), 
                              labels = c("Word Extension","Morphology","Mutual Exclusivity"))

ggplot(expt1_summary, aes(Age_Group,acc.mean, fill = expt)) +
  geom_bar(stat = "identity",  position = dodge) +
  geom_errorbar(aes(ymax = expt1_summary$acc.high, ymin = expt1_summary$acc.low), width=0.25, position = dodge) +
  facet_wrap(~word_class,nrow = 2)+
  labs(fill = "Trial Type") + 
  theme(axis.text.x = element_text(colour = "black", size = 12)) +
  ylab("Proportion of extensions to instrument or labeled noun") +
  xlab("") + 
  ylim(c(0,1)) +
  geom_hline(yintercept = 0.5, size =1, linetype = 2)+
  scale_fill_brewer(palette = "Set1", direction = -1)+
  scale_fill_discrete(breaks=c("WE","MOR","ME"), 
                      labels = c("WE",
                                 "MOR", "ME"))

ggplot(subset(expt1_summary, word_class == "Verb"), aes(Age_Group,acc.mean, fill = expt)) +
  geom_bar(stat = "identity",  position = dodge) +
  geom_errorbar(data = subset(expt1_summary, word_class == "Verb"),aes(ymax = acc.high, ymin = acc.low), width=0.25, position = dodge) +
 #facet_wrap(~word_class,nrow = 2)+
  labs(fill = "Trial Type") + 
  theme(axis.text.x = element_text(colour = "black", size = 12)) +
  ylab("Proportion of extensions to instrument or labeled noun") +
  xlab("Age") + 
  ylim(c(0,1)) +
  geom_hline(yintercept = 0.5, size =1, linetype = 2)+
  scale_fill_brewer(palette = "Set1", direction = -1)+
  scale_fill_discrete(breaks=c("WE","MOR","ME"), 
                      labels = c("WE",
                                 "MOR", "ME"))

ggplot(expt1_summary_subj, aes(Age.Test,acc.m, color = expt)) +
  geom_point() +
  stat_smooth() +
  facet_wrap(~word_class,nrow = 2)+
  labs(fill = "Trial Type") + 
  theme(axis.text.x = element_text(colour = "black", size = 12)) +
  ylab("Accuracy") +
  xlab("") + 
  ylim(c(0,1)) +
  geom_hline(yintercept = 0.5, size =1, linetype = 2)

1.2 Overall analyses

library(brms)
z<- 1
#simple_age_expt <- summary(brm(accuracy ~ 1 + expt*scale(ge_weeks)+ (1|subject_id), data = subset(expt1_fam, word_class == "Verb"), family = "bernoulli", iter = 500, chains = 4))
#kable(data.frame(simple_age_expt$fixed), digits = 2)

We regress accuracy against age. First we do this for each condition separately (WE, ME, MOR). Then we combine WE and MOR to see if there are differences between these groups. To interpret these regressions, look at the rows headed Fixed Effects.

For WE, overall accuracy (the intercept) is marginally above chance, and there is significant effect of age group: Children get better at this task with age.
For ME, overall accuracy (the intercept) is marginally below chance (that’s good – they should be below chance) and there is no effect of age (note that this was only done with 4-year-olds).
For MOR, overall accuracy (the intercept) is not significantly difference from chance, and there is a significant effect of age.
For the comparison between WE and MOR, children get better with age and there are no further differences. Surprisingly, morphology does not greatly aide their interpretation.

noun_summary <- expt1_fam %>%
  filter(word_class != "Verb") %>%
  group_by(subject_id) %>%
  dplyr::select(accuracy,subject_id) %>%
  dplyr::summarise(noun_accuracy = mean(accuracy))

expt1_fam <- left_join(expt1_fam,noun_summary)

# 
summary(glmer(accuracy ~ scale(Age.Test) +
                (1|subject_id)+
                (1+scale(Age.Test)||word), 
               data = subset(expt1_fam, word_class == "Verb" & expt == "WE"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) + (1 | subject_id) + (1 + scale(Age.Test) ||      word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "WE")
## 
##      AIC      BIC   logLik deviance df.resid 
##    693.3    715.4   -341.6    683.3      612 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1908 -0.4951  0.2330  0.4154  2.1035 
## 
## Random effects:
##  Groups     Name            Variance Std.Dev.
##  subject_id (Intercept)     5.172616 2.27434 
##  word       (Intercept)     0.001019 0.03192 
##  word.1     scale(Age.Test) 0.122233 0.34962 
## Number of obs: 617, groups:  subject_id, 124; word, 5
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)  
## (Intercept)       0.4634     0.2480   1.868   0.0617 .
## scale(Age.Test)   0.6676     0.2908   2.296   0.0217 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## scl(Ag.Tst) 0.081

summary(glmer(accuracy ~ scale(Age.Test)  + 
                (1|subject_id)+
                (1+scale(Age.Test)|word), 
              data = subset(expt1_fam, word_class == "Verb" & expt == "ME"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) + (1 | subject_id) + (1 + scale(Age.Test) |      word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "ME")
## 
##      AIC      BIC   logLik deviance df.resid 
##    543.6    567.7   -265.8    531.6      409 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2644 -0.6928 -0.5182  0.9634  1.7756 
## 
## Random effects:
##  Groups     Name            Variance Std.Dev. Corr
##  subject_id (Intercept)     0.740486 0.86051      
##  word       (Intercept)     0.115189 0.33939      
##             scale(Age.Test) 0.002395 0.04894  1.00
## Number of obs: 415, groups:  subject_id, 84; word, 5
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -0.6328     0.2126  -2.976  0.00292 **
## scale(Age.Test)  -0.1363     0.1466  -0.930  0.35246   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## scl(Ag.Tst) 0.127

summary(glmer(accuracy ~ scale(Age.Test)  +
                (1|subject_id)+
                (1|word),  
              data = subset(expt1_fam, word_class == "Verb" & expt == "MOR"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) + (1 | subject_id) + (1 | word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "MOR")
## 
##      AIC      BIC   logLik deviance df.resid 
##    530.6    546.8   -261.3    522.6      424 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9414 -0.6249  0.3343  0.6243  1.8680 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 2.34024  1.5298  
##  word       (Intercept) 0.03381  0.1839  
## Number of obs: 428, groups:  subject_id, 85; word, 5
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)       0.2367     0.2221   1.066  0.28639   
## scale(Age.Test)   0.5451     0.2073   2.630  0.00854 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## scl(Ag.Tst) 0.051

# Have to simplify model to aide convergence
summary(glmer(accuracy ~ scale(Age.Test)*expt +
                (1|subject_id)+
                (1|word), 
              data = subset(expt1_fam, word_class == "Verb" & expt %in% c("MOR","WE") & !Age_Group %in% c("4","4.5")), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) * expt + (1 | subject_id) + (1 | word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt %in% c("MOR", "WE") &      !Age_Group %in% c("4", "4.5"))
## 
##      AIC      BIC   logLik deviance df.resid 
##   1020.0   1048.4   -504.0   1008.0      834 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1888 -0.6037  0.2912  0.5749  2.0217 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 2.69072  1.6403  
##  word       (Intercept) 0.06873  0.2622  
## Number of obs: 840, groups:  subject_id, 168; word, 5
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)   
## (Intercept)             0.205809   0.246446   0.835  0.40366   
## scale(Age.Test)         0.587125   0.225718   2.601  0.00929 **
## exptWE                 -0.009982   0.310122  -0.032  0.97432   
## scale(Age.Test):exptWE  0.051229   0.310523   0.165  0.86896   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sc(A.T) exptWE
## scl(Ag.Tst) -0.020               
## exptWE      -0.613  0.020        
## scl(A.T):WE  0.017 -0.720   0.044

1.3 Condition on whether verb is known

There are no significant effects of whether the verb is known on children’s accuracy in this task.

In the WE condition, the regression estimates that when children know the verb then they are more accuracte, and that this effect increases with age, but the effect is not statistically significant.
There are no further effects in the ME or MOR conditions, or in the comparison between WE and MOR.

summary(glmer(accuracy ~ scale(Age.Test)*scale(known)  +
                (1+scale(known)||subject_id)+
                (1+scale(Age.Test)||word), 
              data = subset(expt1_fam, word_class == "Verb" & expt == "WE"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) * scale(known) + (1 + scale(known) ||      subject_id) + (1 + scale(Age.Test) || word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "WE")
## 
##      AIC      BIC   logLik deviance df.resid 
##    690.1    725.5   -337.1    674.1      606 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1982 -0.4776  0.2230  0.4402  2.1691 
## 
## Random effects:
##  Groups       Name            Variance  Std.Dev. 
##  subject_id   (Intercept)     5.289e+00 2.2998268
##  subject_id.1 scale(known)    1.162e-08 0.0001078
##  word         (Intercept)     3.521e-03 0.0593413
##  word.1       scale(Age.Test) 1.019e-01 0.3192148
## Number of obs: 614, groups:  subject_id, 124; word, 5
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                    0.3838     0.2551   1.505   0.1324  
## scale(Age.Test)                0.6475     0.2903   2.230   0.0257 *
## scale(known)                   0.2592     0.1724   1.504   0.1326  
## scale(Age.Test):scale(known)   0.2276     0.1520   1.497   0.1344  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sc(A.T) scl(k)
## scl(Ag.Tst)  0.086               
## scale(knwn) -0.105 -0.129        
## scl(A.T):() -0.162 -0.051   0.661

summary(glmer(accuracy ~ scale(Age.Test)*scale(known)  +
                (1+scale(known)||subject_id)+
                (1+scale(Age.Test)||word), 
              data = subset(expt1_fam, word_class == "Verb" & expt == "ME"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) * scale(known) + (1 + scale(known) ||      subject_id) + (1 + scale(Age.Test) || word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "ME")
## 
##      AIC      BIC   logLik deviance df.resid 
##    542.0    574.3   -263.0    526.0      407 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8665 -0.6743 -0.5084  0.9663  1.8396 
## 
## Random effects:
##  Groups       Name            Variance Std.Dev.
##  subject_id   (Intercept)     0.69044  0.8309  
##  subject_id.1 scale(known)    0.00000  0.0000  
##  word         (Intercept)     0.09755  0.3123  
##  word.1       scale(Age.Test) 0.00000  0.0000  
## Number of obs: 415, groups:  subject_id, 84; word, 5
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -0.70802    0.20824  -3.400 0.000674 ***
## scale(Age.Test)              -0.12009    0.14918  -0.805 0.420811    
## scale(known)                  0.07646    0.19509   0.392 0.695102    
## scale(Age.Test):scale(known)  0.30091    0.17033   1.767 0.077279 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sc(A.T) scl(k)
## scl(Ag.Tst)  0.048               
## scale(knwn) -0.176 -0.272        
## scl(A.T):() -0.226 -0.169   0.767

summary(glmer(accuracy ~ scale(Age.Test)*scale(known)  +
                (1+scale(known)||subject_id)+
                (1+scale(Age.Test)||word), 
              data = subset(expt1_fam, word_class == "Verb" & expt == "MOR"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) * scale(known) + (1 + scale(known) ||      subject_id) + (1 + scale(Age.Test) || word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "MOR")
## 
##      AIC      BIC   logLik deviance df.resid 
##    537.5    570.0   -260.8    521.5      420 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9562 -0.6409  0.3292  0.6341  1.8914 
## 
## Random effects:
##  Groups       Name            Variance  Std.Dev. 
##  subject_id   (Intercept)     2.315e+00 1.521e+00
##  subject_id.1 scale(known)    9.958e-10 3.156e-05
##  word         (Intercept)     3.197e-02 1.788e-01
##  word.1       scale(Age.Test) 3.124e-08 1.767e-04
## Number of obs: 428, groups:  subject_id, 85; word, 5
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                   0.19924    0.22361   0.891   0.3729  
## scale(Age.Test)               0.52350    0.21020   2.490   0.0128 *
## scale(known)                  0.09248    0.14977   0.618   0.5369  
## scale(Age.Test):scale(known)  0.14910    0.14482   1.030   0.3032  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sc(A.T) scl(k)
## scl(Ag.Tst)  0.061               
## scale(knwn) -0.066 -0.180        
## scl(A.T):() -0.163 -0.069   0.443

# Have to simplify model to aide convergence
summary(glmer(accuracy ~ scale(Age.Test)*expt*scale(known) +
                (1|subject_id)+
                (1+scale(Age.Test)||word), 
              data = subset(expt1_fam, word_class == "Verb" & expt %in% c("MOR","WE") & !Age_Group %in% c("4","4.5")), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) * expt * scale(known) + (1 | subject_id) +      (1 + scale(Age.Test) || word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt %in% c("MOR", "WE") &      !Age_Group %in% c("4", "4.5"))
## 
##      AIC      BIC   logLik deviance df.resid 
##   1021.9   1073.9   -499.9    999.9      826 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2068 -0.5962  0.2836  0.5639  2.1280 
## 
## Random effects:
##  Groups     Name            Variance  Std.Dev. 
##  subject_id (Intercept)     2.713e+00 1.6471480
##  word       (Intercept)     6.928e-02 0.2632203
##  word.1     scale(Age.Test) 1.176e-09 0.0000343
## Number of obs: 837, groups:  subject_id, 168; word, 5
## 
## Fixed effects:
##                                     Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                          0.17417    0.24993   0.697   0.4859  
## scale(Age.Test)                      0.56168    0.22997   2.442   0.0146 *
## exptWE                              -0.02771    0.31655  -0.088   0.9303  
## scale(known)                         0.11163    0.14621   0.764   0.4452  
## scale(Age.Test):exptWE               0.08875    0.31734   0.280   0.7797  
## scale(Age.Test):scale(known)         0.15338    0.15080   1.017   0.3091  
## exptWE:scale(known)                 -0.01898    0.21697  -0.088   0.9303  
## scale(Age.Test):exptWE:scale(known) -0.05194    0.21479  -0.242   0.8089  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sc(A.T) exptWE scl(k) sc(A.T):WE s(A.T):( eWE:()
## scl(Ag.Tst) -0.015                                                 
## exptWE      -0.613  0.016                                          
## scale(knwn) -0.027 -0.168   0.019                                  
## scl(A.T):WE  0.014 -0.717   0.052  0.118                           
## scl(A.T):() -0.139 -0.055   0.110  0.392  0.040                    
## exptWE:sc()  0.019  0.107  -0.081 -0.645 -0.171     -0.264         
## s(A.T):WE:(  0.098  0.037  -0.173 -0.267 -0.074     -0.702    0.485

1.4 Condition on whether children were correct on the noun trials

There are no significant effects of children’s accuracy on the noun trials, once age is accounted for. For some reason, the model for the ME condition will not converge.

summary(glmer(accuracy ~ scale(Age.Test)*scale(noun_accuracy)  +
                (1|subject_id)+
                (1+scale(Age.Test)|word), 
              data = subset(expt1_fam, word_class == "Verb" & expt == "WE"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) * scale(noun_accuracy) + (1 | subject_id) +      (1 + scale(Age.Test) | word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "WE")
## 
##      AIC      BIC   logLik deviance df.resid 
##    696.8    732.2   -340.4    680.8      609 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1399 -0.4810  0.2369  0.4239  2.1592 
## 
## Random effects:
##  Groups     Name            Variance Std.Dev. Corr
##  subject_id (Intercept)     5.0451   2.2461       
##  word       (Intercept)     0.0000   0.0000       
##             scale(Age.Test) 0.1249   0.3534    NaN
## Number of obs: 617, groups:  subject_id, 124; word, 5
## 
## Fixed effects:
##                                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                            0.9941     0.4342   2.289   0.0221 *
## scale(Age.Test)                        0.9447     0.4197   2.251   0.0244 *
## scale(noun_accuracy)                  -0.9617     0.7572  -1.270   0.2041  
## scale(Age.Test):scale(noun_accuracy)  -0.8467     0.5569  -1.520   0.1284  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sc(A.T) scl(_)
## scl(Ag.Tst)  0.493               
## scl(nn_ccr) -0.760 -0.694        
## sc(A.T):(_) -0.825 -0.560   0.921
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues

summary(glmer(accuracy ~ scale(Age.Test)*scale(noun_accuracy)  +
                (1|subject_id), 
              data = subset(expt1_fam, word_class == "Verb" & expt == "ME"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) * scale(noun_accuracy) + (1 | subject_id)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "ME")
## 
##      AIC      BIC   logLik deviance df.resid 
##    542.9    563.0   -266.5    532.9      410 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1905 -0.6868 -0.5412  1.0411  1.9072 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 0.6212   0.7882  
## Number of obs: 415, groups:  subject_id, 84
## 
## Fixed effects:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -0.7150     0.1724  -4.147 3.37e-05 ***
## scale(Age.Test)                       -0.2431     0.1720  -1.413    0.158    
## scale(noun_accuracy)                  -0.2206     0.1759  -1.254    0.210    
## scale(Age.Test):scale(noun_accuracy)  -0.1761     0.1624  -1.084    0.278    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sc(A.T) scl(_)
## scl(Ag.Tst)  0.047               
## scl(nn_ccr)  0.094  0.590        
## sc(A.T):(_)  0.560 -0.001   0.086

summary(glmer(accuracy ~ scale(Age.Test)*scale(noun_accuracy)  + 
                (1|subject_id)+
                (1+scale(Age.Test)|word),
              data = subset(expt1_fam, word_class == "Verb" & expt == "MOR"), family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: accuracy ~ scale(Age.Test) * scale(noun_accuracy) + (1 | subject_id) +      (1 + scale(Age.Test) | word)
##    Data: subset(expt1_fam, word_class == "Verb" & expt == "MOR")
## 
##      AIC      BIC   logLik deviance df.resid 
##    537.5    570.0   -260.8    521.5      420 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8634 -0.6434  0.3415  0.6000  1.8771 
## 
## Random effects:
##  Groups     Name            Variance Std.Dev. Corr 
##  subject_id (Intercept)     2.35053  1.5331        
##  word       (Intercept)     0.03714  0.1927        
##             scale(Age.Test) 0.02665  0.1632   -1.00
## Number of obs: 428, groups:  subject_id, 85; word, 5
## 
## Fixed effects:
##                                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                           0.26900    0.26284   1.023   0.3061  
## scale(Age.Test)                       0.60261    0.26271   2.294   0.0218 *
## scale(noun_accuracy)                 -0.12820    0.31934  -0.402   0.6881  
## scale(Age.Test):scale(noun_accuracy) -0.06476    0.25574  -0.253   0.8001  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) sc(A.T) scl(_)
## scl(Ag.Tst)  0.046               
## scl(nn_ccr) -0.338 -0.516        
## sc(A.T):(_) -0.523 -0.197   0.644
## convergence code: 0
## Model failed to converge with max|grad| = 0.00139695 (tol = 0.001, component 1)

1.5 Proportion correct by age group, item and condition

Note that in the WE and MOR condition, correct answers = 1, and in the ME condition, correct answers = 0.

kable(data.frame(summaryBy(accuracy ~ Age_Group + trial_id, data = subset(expt1_fam, word_class == "Verb"))), digits = 2)

Age_Group	trial_id	accuracy.mean
2	brush_MOR	0.20
2	brush_WE	0.38
2	hammer_MOR	0.50
2	hammer_WE	0.18
2	rake_MOR	0.60
2	rake_WE	0.68
2	scoop_MOR	0.42
2	scoop_WE	0.27
2	shovel_MOR	0.50
2	shovel_WE	0.48
2.5	brush_MOR	0.38
2.5	brush_WE	0.50
2.5	hammer_MOR	0.38
2.5	hammer_WE	0.55
2.5	rake_MOR	0.43
2.5	rake_WE	0.50
2.5	scoop_MOR	0.43
2.5	scoop_WE	0.47
2.5	shovel_MOR	0.33
2.5	shovel_WE	0.45
3	brush_ME	0.48
3	brush_MOR	0.60
3	brush_WE	0.43
3	hammer_ME	0.29
3	hammer_MOR	0.75
3	hammer_WE	0.52
3	rake_ME	0.57
3	rake_MOR	0.75
3	rake_WE	0.48
3	scoop_ME	0.48
3	scoop_MOR	0.55
3	scoop_WE	0.48
3	shovel_ME	0.52
3	shovel_MOR	0.75
3	shovel_WE	0.43
3.5	brush_ME	0.37
3.5	brush_MOR	0.62
3.5	brush_WE	0.65
3.5	hammer_ME	0.20
3.5	hammer_MOR	0.64
3.5	hammer_WE	0.60
3.5	rake_ME	0.25
3.5	rake_MOR	0.64
3.5	rake_WE	0.85
3.5	scoop_ME	0.50
3.5	scoop_MOR	0.56
3.5	scoop_WE	0.70
3.5	shovel_ME	0.35
3.5	shovel_MOR	0.52
3.5	shovel_WE	0.80
4	brush_ME	0.14
4	brush_WE	0.67
4	hammer_ME	0.19
4	hammer_WE	0.62
4	rake_ME	0.48
4	rake_WE	0.57
4	scoop_ME	0.29
4	scoop_WE	0.57
4	shovel_ME	0.35
4	shovel_WE	0.71
4.5	brush_ME	0.43
4.5	brush_WE	0.70
4.5	hammer_ME	0.23
4.5	hammer_WE	0.70
4.5	rake_ME	0.27
4.5	rake_WE	0.50
4.5	scoop_ME	0.59
4.5	scoop_WE	0.55
4.5	shovel_ME	0.45
4.5	shovel_WE	0.65

2 Experiment 2

This experiment uses unfamilier objects and instruments, with 20 participants per condition (Word Extension/Mutual Exclusivity) and age group.

a=0


read_TD_data <- function(csv,type){
word <- bind_rows(lapply(csv, read.csv, 
                         colClasses = c("num_siblings"="character",
                                        "birth_order"="character",
                                        "subject_id"="character")))
word$DOT <- parse_date_time(word$date_stamp, "%m/%d/%Y")
word$DOB <- parse_date_time(word$date_of_birth,c("%Y-%m-%d","%d/%m.%y"))
word$age_weeks <- difftime(word$DOT,word$DOB,units = "weeks")
word$age <- as.numeric(floor(word$age_weeks/26)/2)
#word <- subset(word, age > 1 & age <= 9)
word$age <- ordered(word$age)
word$Experiment = type
word$trial_type <- as.factor(word$trial_type)
#word <- subset(word, trial_type == "critical" )
return(word)
}


instr_expt2_csv <- dir(path = "./data_expt2/",pattern='*.csv$', recursive = T,full.names = T)

expt2_fam <- read_TD_data(instr_expt2_csv,"Instrument Verb expt2")


# discard intro trial
expt2_fam <- subset(expt2_fam, trial_id != "intro")
expt2_fam <- subset(expt2_fam, testing_loc != "pilot")
#ins_fam$condition <- as.factor(ifelse(ins_fam$trial_type == "critical","ins_verb_WE","noun_WE"))

expt2_fam$expt <- NA
expt2_fam$expt[grep("WE",expt2_fam$trial_id)] <- "WE"
expt2_fam$expt[grep("ME",expt2_fam$trial_id)] <- "ME"
expt2_fam$expt <- ordered(expt2_fam$expt, levels = c("WE","ME"))
expt2_fam$word_class <- "Verb"
expt2_fam$word_class[expt2_fam$trial_type %in% c("training","filler")] <- "Noun"
expt2_fam$word <- str_split_fixed(expt2_fam$trial_id,"_",3)[,1]


# Add Demog Info
demog <- read.csv("Expt2_demog.csv", header =T)
#setdiff(expt1_fam$subject_id,demog$subject_id)
#setdiff(demog$subject_id,expt1_fam$subject_id)

expt2_fam <- expt2_fam[expt2_fam$subject_id %in% demog$subject_id,]
expt2_fam$condition <- as.factor(ifelse(expt2_fam$trial_type == "critical","ins_verb_WE","noun_WE"))
#ins_fam <- subset(ins_fam, trial_id != "intro")

expt2_fam <- left_join(expt2_fam, demog, by = c("subject_id","expt"))
expt2_fam$Age_Group <- as.factor(expt2_fam$Age_Group)
contrasts(expt2_fam$Age_Group)[1] = -0.5
contrasts(expt2_fam$Age_Group)[2] = 0.5
expt2_fam$expt = factor(expt2_fam$expt)
contrasts(expt2_fam$expt)[1] = -0.5
contrasts(expt2_fam$expt)[2] = 0.5

expt2_summary <- expt2_fam %>%
  dplyr::group_by(word_class,expt,subject_id,Age_Group) %>%
  dplyr::select(rt,accuracy,word_class,expt,subject_id,Age_Group) %>%
  dplyr::summarise(acc.m = mean(accuracy,na.rm = T),rt.m = mean(rt,na.rm = T)) %>% 
  dplyr::group_by(word_class,expt,Age_Group) %>%
  dplyr::select(acc.m,rt.m,word_class,expt,subject_id,Age_Group) %>%
  dplyr::summarise(acc.mean = mean(acc.m,na.rm = T),acc.sd = sd(acc.m,na.rm=T),rt.mean = mean(rt.m, na.rm = T), rt.sd = sd(rt.m,na.rm = T),acc.low = ci.low(acc.m),acc.high = ci.high(acc.m),rt.low = ci.low(rt.m),rt.high = ci.high(rt.m)) 

expt2_summary_subj <- expt2_fam %>%
  dplyr::group_by(word_class,expt,subject_id,Age.Test) %>%
  dplyr::select(rt,accuracy,word_class,expt,subject_id,Age.Test) %>%
  dplyr::summarise(acc.m = mean(accuracy,na.rm = T),rt.m = mean(rt,na.rm = T)) 

dodge <- position_dodge(width=0.9)

expt2_summary$expt <- ordered(expt2_summary$expt, 
                              levels = c("WE","MOR","ME"), 
                              labels = c("Word Extension","Morphology","Mutual Exclusivity"))

ggplot(expt2_summary, aes(Age_Group,acc.mean, fill = expt)) +
  geom_bar(stat = "identity",  position = dodge) +
  geom_errorbar(aes(ymax = expt2_summary$acc.high, ymin = expt2_summary$acc.low), width=0.25, position = dodge) +
  facet_wrap(~word_class,nrow = 2)+
  labs(fill = "Trial Type") + 
  theme(axis.text.x = element_text(colour = "black", size = 12)) +
  ylab("Proportion of extensions to instrument or labeled noun") +
  xlab("") + 
  ylim(c(0,1)) +
  geom_hline(yintercept = 0.5, size =1, linetype = 2)+
  scale_fill_brewer(palette = "Set1", direction = -1)+
  scale_fill_discrete(breaks=c("WE","MOR","ME"), 
                      labels = c("WE",
                                 "MOR", "ME"))

Statistical analyses show a significant difference between the experiments. Children are reliably below chance in the Mutual Exclusivity condition, and marginally above chance in the Word Extension condition. There is no interaction with age, but the effect is smaller in the older kids (which I would chalk up to a false negative).

Omnibus Model
	Estimate	Std. Error	z value	Pr(>\|z\|)
(Intercept)	-0.0243847	0.2032347	-0.1199829	0.9044967
exptWE	1.2809667	0.4136803	3.0965138	0.0019581
Age_Group4	0.2964035	0.4073795	0.7275856	0.4668673
exptWE:Age_Group4	-1.1523481	0.8153518	-1.4133139	0.1575634

Word Extension Condition
	Estimate	Std. Error	z value	Pr(>\|z\|)
(Intercept)	0.8037068	0.4325811	1.8579334	0.0631785
Age_Group4	-0.2816040	0.8294605	-0.3395026	0.7342311

Mutual Exclusivity Condition
	Estimate	Std. Error	z value	Pr(>\|z\|)
(Intercept)	-0.553264	0.2062215	-2.682863	0.0072995
Age_Group4	0.692760	0.4087432	1.694854	0.0901031

2.1 Comparison to Experiment 1

Familiar meanings are from Experiment 1, novel meanings are from Experiment 2. There is no significant difference between the experiments, nor interactions involving the experiment.

expt2_sub = subset(expt2_summary, word_class == "Verb")
expt1_sub = subset(expt1_summary, word_class == "Verb" & 
                     Age_Group %in% c("3.5","4") & 
                     expt %in% c("Mutual Exclusivity", "Word Extension"))
expt2_sub$Familiarity = "Novel Meanings"
expt1_sub$Familiarity = "Familiar Meanings"

expt_comparison = rbind(expt1_sub, expt2_sub)


ggplot(expt_comparison, aes(Age_Group,acc.mean, fill = expt)) +
  geom_bar(stat = "identity",  position = dodge) +
  geom_errorbar(aes(ymax = acc.high, ymin = acc.low), width=0.25, position = dodge) +
 facet_grid(.~Familiarity)+
  labs(fill = "Trial Type") + 
  theme(axis.text.x = element_text(colour = "black", size = 12)) +
  ylab("Proportion of extensions to instrument or labeled noun") +
  xlab("Age") + 
  ylim(c(0,1)) +
  geom_hline(yintercept = 0.5, size =1, linetype = 2)+
  scale_fill_brewer(palette = "Set1", direction = -1)+
  scale_fill_discrete(breaks=c("WE","MOR","ME"), 
                      labels = c("WE",
                                 "MOR", "ME"))

expt2_sub_s = subset(expt2_fam, word_class == "Verb") %>%
  select(accuracy, Age_Group, subject_id, expt)
expt1_sub_s = subset(expt1_fam, word_class == "Verb" & 
                     Age_Group %in% c("3.5","4") & 
                     expt %in% c("ME", "WE")) %>%
  select(accuracy, Age_Group, subject_id, expt)
expt2_sub_s$Familiarity = "Novel Meanings"
expt1_sub_s$Familiarity = "Familiar Meanings"

expt_comparison_s = rbind(expt1_sub_s, expt2_sub_s)

expt_comparison_s$Familiarity = factor(expt_comparison_s$Familiarity)
expt_comparison_s$expt = factor(expt_comparison_s$expt)
expt_comparison_s$Age_Group = factor(expt_comparison_s$Age_Group)
contrasts(expt_comparison_s$Familiarity)[1] = 0.5
contrasts(expt_comparison_s$Familiarity)[2] = -0.5
contrasts(expt_comparison_s$Age_Group)[1] = 0.5
contrasts(expt_comparison_s$Age_Group)[2] = -0.5
contrasts(expt_comparison_s$expt)[1] = 0.5
contrasts(expt_comparison_s$expt)[2] = -0.5

kable(summary(glmer(accuracy ~ expt * Age_Group * Familiarity + (1|subject_id), 
                    data = expt_comparison_s,
                    family = "binomial"))$coef)

	Estimate	Std. Error	z value	Pr(>\|z\|)
(Intercept)	-0.0003456	0.1681004	-0.0020558	0.9983597
exptWE	-1.8850611	0.3511546	-5.3681801	0.0000001
Age_Group4	0.0624165	0.3359114	0.1858125	0.8525918
FamiliarityNovel Meanings	0.0368701	0.3359201	0.1097585	0.9126009
exptWE:Age_Group4	-0.7335070	0.6723143	-1.0910180	0.2752650
exptWE:FamiliarityNovel Meanings	-0.9867095	0.6730142	-1.4661051	0.1426197
Age_Group4:FamiliarityNovel Meanings	0.8024247	0.6738554	1.1907965	0.2337335
exptWE:Age_Group4:FamiliarityNovel Meanings	1.0186230	1.3455564	0.7570274	0.4490334

Word Extension with Familiar Instrument Verbs (WE)

1/4/2017