Set Up

Load Packages

rm(list = ls(all = TRUE))
library(languageR)
suppressPackageStartupMessages(library(lattice))
suppressPackageStartupMessages(library(lme4))
library(plotrix)
suppressPackageStartupMessages(library(irr))
library(plyr)
library(knitr)
library(ggplot2)
suppressPackageStartupMessages(library(reshape))
suppressPackageStartupMessages(library(reshape2))
library(WRS2)
library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
## 
##     melanoma
library(Hmisc)
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
## 
##     aml
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

Load Helper Functions

SummarySE

This function can be found on the website “Cookbook for R”

http://www.cookbook-r.com/Manipulating_data/Summarizing_data/

It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).

  • data: a data frame
  • measurevar: the name of a column that contains the variable to be summarized
  • groupvars: a vector containing the names of the columns that contain grouping variables
  • na.rm: a boolean that indicates whether to ignore NA’s
  • conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
           conf.interval=.95, .drop=TRUE) {
  require(plyr)

  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else    length(x)
  }

  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
   .fun = function(xx, col) {
    c(N  = length2(xx[[col]], na.rm=na.rm),
     mean = mean  (xx[[col]], na.rm=na.rm),
     sd  = sd   (xx[[col]], na.rm=na.rm)
    )
   },
   measurevar
  )

  # Rename the "mean" column  
  datac <- rename(datac, c("mean" = measurevar))

  datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean

  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult

  return(datac)
}

SummarySEwithin

This function can be found on the website “Cookbook for R”

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions

From that website:

Summarizes data, handling within-subjects variables by removing inter-subject variability. It will still work if there are no within-subjects variables. Gives count, un-normed mean, normed mean (with same between-group mean), standard deviation, standard error of the mean, and confidence interval. If there are within-subject variables, calculate adjusted values using method from Morey (2008).

  • data: a data frame
  • measurevar: the name of a column that contains the variable to be summarized
  • betweenvars: a vector containing names of columns that are between-subjects variables
  • withinvars: a vector containing names of columns that are within-subjects variables
  • idvar: the name of a column that identifies each subject (or matched subjects)
  • na.rm: a boolean that indicates whether to ignore NA’s
  • conf.interval: the percent range of the confidence interval (default is 95%)
summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
              idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) {

 # Ensure that the betweenvars and withinvars are factors
 factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
  FUN=is.factor, FUN.VALUE=logical(1))

 if (!all(factorvars)) {
  nonfactorvars <- names(factorvars)[!factorvars]
  message("Automatically converting the following non-factors to factors: ",
      paste(nonfactorvars, collapse = ", "))
  data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
 }

 # Get the means from the un-normed data
 datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
           na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

 # Drop all the unused columns (these will be calculated with normed data)
 datac$sd <- NULL
 datac$se <- NULL
 datac$ci <- NULL

 # Norm each subject's data
 ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)

 # This is the name of the new column
 measurevar_n <- paste(measurevar, "_norm", sep="")

 # Collapse the normed data - now we can treat between and within vars the same
 ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
           na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

 # Apply correction from Morey (2008) to the standard error and confidence interval
 # Get the product of the number of conditions of within-S variables
 nWithinGroups  <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
              FUN.VALUE=numeric(1)))
 correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )

 # Apply the correction factor
 ndatac$sd <- ndatac$sd * correctionFactor
 ndatac$se <- ndatac$se * correctionFactor
 ndatac$ci <- ndatac$ci * correctionFactor

 # Combine the un-normed means with the normed results
 merge(datac, ndatac)
}

normDataWithin

This function is used by the SummarySEWithin function above. It can be found on the website “Cookbook for R”

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions

From that website:

Norms the data within specified groups in a data frame; it normalizes each subject (identified by idvar) so that they have the same mean, within each group specified by betweenvars.

  • data: a data frame
  • idvar: the name of a column that identifies each subject (or matched subjects)
  • measurevar: the name of a column that contains the variable to be summarized
  • betweenvars: a vector containing names of columns that are between-subjects variables
  • na.rm: a boolean that indicates whether to ignore NA’s
normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
              na.rm=FALSE, .drop=TRUE) {
  #library(plyr)

  # Measure var on left, idvar + between vars on right of formula.
  data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
   .fun = function(xx, col, na.rm) {
    c(subjMean = mean(xx[,col], na.rm=na.rm))
   },
   measurevar,
   na.rm
  )

  # Put the subject means with original data
  data <- merge(data, data.subjMean)

  # Get the normalized data in a new column
  measureNormedVar <- paste(measurevar, "_norm", sep="")
  data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
                mean(data[,measurevar], na.rm=na.rm)

  # Remove this subject mean column
  data$subjMean <- NULL

  return(data)
}

myCenter

This function outputs the centered values of a variable, which can be a numeric variable, a factor, or a data frame. It was taken from Florian Jaegers blog.

https://hlplab.wordpress.com/2009/04/27/centering-several-variables/.

From his blog:

  • If the input is a numeric variable, the output is the centered variable.
  • If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor’s levels are converted into numerical values in their inherent order (if not specified otherwise, R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.
  • If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by “c” (e.g. “Variable1” will be “cVariable1”).
myCenter= function(x) {
 if (is.numeric(x)) { return(x - mean(x, na.rm=T)) }
    if (is.factor(x)) {
        x= as.numeric(x)
        return(x - mean(x, na.rm=T))
    }
    if (is.data.frame(x) || is.matrix(x)) {
        m = matrix(nrow=nrow(x), ncol = ncol(x))
        colnames(m) = paste("c", colnames(x), sep="")
    
        for (i in 1:ncol(x)) {
        
            m[,i]= myCenter(x[,i])
        }
        return(as.data.frame(m))
    }
}

lizCenter

This function provides a wrapper around myCenter allowing you to center a specific list of variables from a data frame.

  • x: data frame
  • listfname: a list of the variables to be centered (e.g. list(variable1,variable2))

The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labelled with the column’s previous name, but with “.ct” appended (e.g., “variable1” will become “variable1.ct”).

lizCenter= function(x, listfname) 
{
    for (i in 1:length(listfname)) 
    {
        fname = as.character(listfname[i])
        x[paste(fname,".ct", sep = "")] = myCenter(x[fname])
    }
        
    return(x)
}

lizContrasts

This function can be used to create two centered dummy variables which stand in place of a three-way factor (e.g., semantic consistency). This allows us to inspect each contrast separately, as well as their interactions with other factors. Other fixed effects in the model can be evaluated as the average effects across all levels of the factor.

The function takes a data frame (d), a factor from that data frame (condition), which must have three levels, and the name of the level of the factor which is to be used as the baseline for the contrasts (baselevel).

  • d: data frame
  • condition: three-way factor
  • baselevel: name of the level of the factor that is to be used as the baseline for contrasts
lizContrasts= function(d, condition, baselevel) 
{
    condition = factor(condition)
 condition = relevel(condition, baselevel)

    a= (contrasts(condition)-apply(contrasts(condition),2,mean))
    d$dummy1[condition== rownames(a)[1]] <- a[1] 
    d$dummy1[condition== rownames(a)[2]] <- a[2] 
    d$dummy1[condition== rownames(a)[3]] <- a[3] 
    d$dummy2[condition== rownames(a)[1]] <- a[4] 
    d$dummy2[condition== rownames(a)[2]] <- a[5] 
    d$dummy2[condition== rownames(a)[3]] <- a[6] 

    name1 = paste(baselevel, rownames(a)[2],sep="_VS_")
    name2 = paste(baselevel, rownames(a)[3],sep="_VS_")

    d[name1] = d$dummy1 
    d[name2] = d$dummy2 

    d$dummy1 <-NULL 
    d$dummy2 <-NULL 
    
    return(d)
}

get_coeffs

This function allows us to inspect particular coefficients from the output of an lme model by putting them in table.

  • x: the output returned when running lmer or glmer (i.e. an object of type lmerMod or glmerMod)
  • list: a list of the names of the coefficients to be extracted (e.g. c(“variable1”, “variable1:variable2”))
get_coeffs <- function(x,list){(kable(as.data.frame(summary(x)$coefficients)[list,],digits=3))}

Set up Theme for Figures

mytheme <- theme(axis.line = element_line(colour = "grey"),
        text = element_text(size=12),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size=12, face="bold"),
        strip.background = element_rect(fill="white", colour = "white"))

Load Datasets

prod = read.csv("production.csv")
afc = read.csv("afc.csv")

Production Test: Accuracy

prod$session = as.factor(prod$session)
prod$awareness = as.factor(prod$awareness)

Data Preparation

Inter-Rater Reliability for Coding

Use Cohen’s Kappa (used for nominal/categorical variables)

  • 1 = perfect agreement
  • 0 = random agreement
  • -1 = perfect disagreement

Landis & Koch (1977)

  • 0.0-0.2 = slight agreement
  • 0.21-0.40 = fair agreement
  • 0.41-0.6 = moderate agreement
  • 0.61-0.8 = substantial agreement
  • 0.81-1.0 = perfect/near perfect agreement

Krippendorff (1980)

  • 0.67-0.8 = allows tentative conclusions
  • 0.81 or higher = allows definite conclusions

To compute Cohen’s Kappa we need to compare detHB and detAS columns for trials where there was a single phoneme error

prod$detHB = as.character(prod$detHB)
prod$detAS = as.character(prod$detAS)

# First select only items that were originally coded as "other" responses using a strict (fully correct vs. incorrect) criterion
IRR <- droplevels(subset(prod, det_used_logical=="other"))

# Then look at agreement between re-coding of these items between HB and AS
IRR <- cbind(IRR$detHB, IRR$detAS)
kappa2(IRR, weight = c("unweighted", "equal", "squared"), sort.levels = FALSE)
##  Cohen's Kappa for 2 Raters (Weights: unweighted)
## 
##  Subjects = 2478 
##    Raters = 2 
##     Kappa = 0.993 
## 
##         z = 65.6 
##   p-value = 0

Remove Trials without a Det1/Det2 Response

First remove trials where the noun produced was incorrect

prod1 <- droplevels(subset(prod, noun_correct=="1"))

Then remove trials in which the participant either did not produce a sentence-final particle or produced something other than one of the two trained particles

prod1 <- droplevels(subset(prod1, detHB!="none"))
prod1 <- droplevels(subset(prod1, detHB!="other"))

Create Separate Dataframes for Children and Adults

prodCHILD <- droplevels(subset(prod1, age=="Child"))
prodADULT <- droplevels(subset(prod1, age=="Adult"))

Trained Nouns

Figure 1

All Conditions

PRODOLD <- droplevels(subset(prod1, oldnew=="old"))

# Aggregate to obtain means per participant/day etc.
aggregated.PRODOLD = aggregate(det_correct ~ participant + session_fig + consistency_fig + typefreq + age, PRODOLD, FUN=mean)

# Re-order levels within the semantic consistency variable (the default is alphabetical: 1 = fully consistent, 2 = inconsistent, 3 = partially consistent; we want to swap the order of inconsistent/partially consistent)
aggregated.PRODOLD$consistency_fig = factor(aggregated.PRODOLD$consistency_fig, levels(aggregated.PRODOLD$consistency_fig)[c(1,3,2)])
levels(aggregated.PRODOLD$consistency_fig)
## [1] "Fully Consistent"     "Partially Consistent" "Inconsistent"
# Re-order levels within the age variable (the default is alphabetical: 1 = adult, 2 = child, we want to reverse this order)
aggregated.PRODOLD$age = factor(aggregated.PRODOLD$age,
levels(aggregated.PRODOLD$age)[c(2,1)])
levels(aggregated.PRODOLD$age)
## [1] "Child" "Adult"
p = ggplot(aggregated.PRODOLD, aes(x = consistency_fig, y = det_correct, fill = typefreq))
p = p + stat_summary(fun.y = mean, geom = "bar", position="dodge", colour="black", size=.3) 
p = p + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4, position=position_dodge(.9)) 
p = p + xlab("") + ylab("Proportion Correct")
p = p + scale_fill_manual(values = c("grey", "white"), name = "Type Frequency", labels = c("High", "Low"))
p = p + geom_hline(aes(yintercept=0.50),linetype="dashed") 
p = p + coord_cartesian(ylim = c(0, 1))
p = p + facet_grid(~ age + session_fig)
p = p + theme_bw()
p = p + mytheme
p = p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)

ggsave("Figure1_top_panel.jpg", width = 8, height = 5, dpi = 120)

Partially Consistent Condition Only

Compare exception/majority-particle nouns within the partially consistent language.

prodOLDP <- droplevels(subset(PRODOLD, consistency_fig == "Partially Consistent"))

# Aggregate to obtain means per participant/day etc.
aggregated.prodOLDP = aggregate(det_correct ~ participant + session_fig + exception + typefreq + age, prodOLDP, FUN = mean)

# Check the order of levels within the exception variable
levels(aggregated.prodOLDP$exception)
## [1] "0" "1"
# 0 = majority-particle, 1 = exception

# Re-order levels within the age variable (the default is alphabetical: 1 = adult, 2 = child, we want to reverse this order)
aggregated.prodOLDP$age = factor(aggregated.prodOLDP$age, levels(aggregated.prodOLDP$age)[c(2,1)])
levels(aggregated.prodOLDP$age)
## [1] "Child" "Adult"
# Now plot with separate panels for Session 1 and Session 4 for each age group, split by type frequency, then by noun-type (majority-particle vs. exception)
p = ggplot(aggregated.prodOLDP, aes(x = typefreq, y = det_correct, fill = exception))
p = p + stat_summary(fun.y = mean, geom = "bar", position="dodge", colour="black", size=.3) 
p = p + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4, position=position_dodge(.9)) 
p = p + xlab("") + ylab("Proportion Correct")
p = p + scale_fill_manual(values = c("grey", "white"), name = "Noun-Type", labels = c("Majority-Particle", "Exception"))
p = p + geom_hline(aes(yintercept=0.50),linetype="dashed")
p = p + coord_cartesian(ylim = c(0, 1))
p = p + facet_grid(~ age + session_fig)
# p = p + facet_grid(~ day)
p = p + theme_bw()
p = p + mytheme
print(p)

ggsave("Figure1_bottom_panel.jpg", width = 8, height = 4, dpi = 120)

Statistical Analyses

Children

All Conditions

First run a model in which the fully consistent condition (“consistent”) is set as the baseline for contrasts (i.e., the model compares the fully consistent condition to both the partially consistent condition (“Consistent_VS_Partial”) and the inconsistent condition (“Consistent_VS_Inconsistent”))

prodOLDchild <- droplevels(subset(prodCHILD, oldnew=="old"))
prodOLDchild <- lizCenter(prodOLDchild, list("typefreq", "session"))
prodOLDchild <- lizContrasts(prodOLDchild, prodOLDchild$consistency, "Consistent")

prodOLDchild.lmer1 = glmer(det_correct ~
                          + (Consistent_VS_Partial + Consistent_VS_Inconsistent) * typefreq.ct * session.ct
                          + (1 + session.ct|participant),
                          data = prodOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(prodOLDchild.lmer1)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.909 0.090 10.132 0.000
Consistent_VS_Partial -0.666 0.219 -3.047 0.002
Consistent_VS_Inconsistent -0.756 0.219 -3.451 0.001
typefreq.ct 0.291 0.176 1.649 0.099
session.ct 1.115 0.106 10.497 0.000
Consistent_VS_Partial:typefreq.ct -0.324 0.435 -0.746 0.456
Consistent_VS_Inconsistent:typefreq.ct 0.037 0.436 0.085 0.933
Consistent_VS_Partial:session.ct -0.898 0.256 -3.515 0.000
Consistent_VS_Inconsistent:session.ct -0.862 0.258 -3.345 0.001
typefreq.ct:session.ct 0.371 0.204 1.819 0.069
Consistent_VS_Partial:typefreq.ct:session.ct -0.616 0.503 -1.225 0.221
Consistent_VS_Inconsistent:typefreq.ct:session.ct 0.359 0.507 0.708 0.479

We also need to compare the partially consistent and inconsistent conditions (“Inconsistent_VS_Partial”). To do this, rerun the model, with the inconsistent condition as the baseline. (The anova function is used to establish that this is the same model; only relevant parts of the output are printed)

prodOLDchild <- lizContrasts(prodOLDchild, prodOLDchild$consistency, "Inconsistent")

prodOLDchild.lmer2 = glmer(det_correct ~
                           + (Inconsistent_VS_Partial + Inconsistent_VS_Consistent) * typefreq.ct * session.ct
                           + (1 + session.ct|participant),
                           data = prodOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodOLDchild.lmer1, prodOLDchild.lmer2)
## Data: prodOLDchild
## Models:
## prodOLDchild.lmer1: det_correct ~ +(Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## prodOLDchild.lmer1:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDchild.lmer2: det_correct ~ +(Inconsistent_VS_Partial + Inconsistent_VS_Consistent) * 
## prodOLDchild.lmer2:     typefreq.ct * session.ct + (1 + session.ct | participant)
##                    Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDchild.lmer1 15 5779.2 5876.7 -2874.6   5749.2             
## prodOLDchild.lmer2 15 5779.2 5876.7 -2874.6   5749.2     0      0
##                    Pr(>Chisq)
## prodOLDchild.lmer1           
## prodOLDchild.lmer2          1
get_coeffs(prodOLDchild.lmer2, c("Inconsistent_VS_Partial", "Inconsistent_VS_Partial:typefreq.ct", "Inconsistent_VS_Partial:session.ct", "Inconsistent_VS_Partial:typefreq.ct:session.ct"))
Estimate Std. Error z value Pr(>|z|)
Inconsistent_VS_Partial 0.090 0.212 0.427 0.669
Inconsistent_VS_Partial:typefreq.ct -0.361 0.424 -0.853 0.394
Inconsistent_VS_Partial:session.ct -0.036 0.239 -0.153 0.879
Inconsistent_VS_Partial:typefreq.ct:session.ct -0.974 0.478 -2.040 0.041

There is an effect of session in the first model. Check whether performance above chance in both sessions?

prodOLDchild <- lizContrasts(prodOLDchild, prodOLDchild$consistency, "Consistent")

prodOLDchild.lmer1a = glmer(det_correct ~
                            - 1 
                            + session
                            + (Consistent_VS_Partial + Consistent_VS_Inconsistent) * typefreq.ct * session.ct
                            - session.ct
                            + (1 + session.ct|participant),
                            data = prodOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodOLDchild.lmer1, prodOLDchild.lmer1a)
## Data: prodOLDchild
## Models:
## prodOLDchild.lmer1: det_correct ~ +(Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## prodOLDchild.lmer1:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDchild.lmer1a: det_correct ~ -1 + session + (Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## prodOLDchild.lmer1a:     typefreq.ct * session.ct - session.ct + (1 + session.ct | 
## prodOLDchild.lmer1a:     participant)
##                     Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDchild.lmer1  15 5779.2 5876.7 -2874.6   5749.2             
## prodOLDchild.lmer1a 15 5779.2 5876.7 -2874.6   5749.2     0      0
##                     Pr(>Chisq)    
## prodOLDchild.lmer1                
## prodOLDchild.lmer1a  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get_coeffs(prodOLDchild.lmer1a, c("session1", "session4"))
Estimate Std. Error z value Pr(>|z|)
session1 0.287 0.065 4.391 0
session4 1.403 0.127 11.077 0

The first model also showed that session interacted with both Consistent_VS_Inconsistent and Consistent_VS_Partial. We break down each of these interacts by running the model again (with “Consistent” as the baseline level for contrasts) but with (i) the main effect of each contrast and (ii) the interaction of each contrast with session removed, replaced with the effect of each contrast for each level of session (the anova function is used to show that this is equivalent to the original model).

prodOLDchild <- lizContrasts(prodOLDchild, prodOLDchild$consistency, "Consistent")

prodOLDchild.lmer1b = glmer(det_correct ~
                            + Consistent_VS_Partial:session
                            + Consistent_VS_Inconsistent:session
                            + typefreq.ct * session.ct
                            + (Consistent_VS_Partial + Consistent_VS_Inconsistent):typefreq.ct
                            + (Consistent_VS_Partial + Consistent_VS_Inconsistent):typefreq.ct:session.ct
                            + (1 + session.ct|participant),
                            data = prodOLDchild, family=binomial, control=glmerControl(optimizer = "bobyqa"))
anova(prodOLDchild.lmer1,prodOLDchild.lmer1b)
## Data: prodOLDchild
## Models:
## prodOLDchild.lmer1: det_correct ~ +(Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## prodOLDchild.lmer1:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDchild.lmer1b: det_correct ~ +Consistent_VS_Partial:session + Consistent_VS_Inconsistent:session + 
## prodOLDchild.lmer1b:     typefreq.ct * session.ct + (Consistent_VS_Partial + Consistent_VS_Inconsistent):typefreq.ct + 
## prodOLDchild.lmer1b:     (Consistent_VS_Partial + Consistent_VS_Inconsistent):typefreq.ct:session.ct + 
## prodOLDchild.lmer1b:     (1 + session.ct | participant)
##                     Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDchild.lmer1  15 5779.2 5876.7 -2874.6   5749.2             
## prodOLDchild.lmer1b 15 5779.2 5876.7 -2874.6   5749.2     0      0
##                     Pr(>Chisq)    
## prodOLDchild.lmer1                
## prodOLDchild.lmer1b  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get_coeffs(prodOLDchild.lmer1b, c("Consistent_VS_Partial:session1", "Consistent_VS_Partial:session4", "session1:Consistent_VS_Inconsistent", "session4:Consistent_VS_Inconsistent"))
Estimate Std. Error z value Pr(>|z|)
Consistent_VS_Partial:session1 -0.165 0.159 -1.039 0.299
Consistent_VS_Partial:session4 -1.063 0.307 -3.459 0.001
session1:Consistent_VS_Inconsistent -0.276 0.162 -1.706 0.088
session4:Consistent_VS_Inconsistent -1.138 0.308 -3.696 0.000
Partially Consistent Condition Only

Do children differ in their performance with majority-particle and exception items in the partially consistent condition? To look at this run a new model just looking at the partially consistent condition, with exception (1 = exception noun; 0 = majority-particle noun) as a fixed effect, as well as interactions with the other factors.

prodOLDchildP <- droplevels(subset(prodOLDchild, consistency == "Partial"))
prodOLDchildP <- lizCenter(prodOLDchildP, list("typefreq", "session", "exception"))

prodOLDchildP.lmer = glmer(det_correct ~ 
                          + typefreq.ct * session.ct * exception.ct
                          + (1 + session.ct * exception.ct|participant), 
                          data = prodOLDchildP, family = binomial, control = glmerControl(optimizer = "bobyqa"))

get_coeffs(prodOLDchildP.lmer, c("exception.ct", "typefreq.ct:exception.ct", "session.ct:exception.ct", "typefreq.ct:session.ct:exception.ct"))
Estimate Std. Error z value Pr(>|z|)
exception.ct 0.003 0.183 0.014 0.989
typefreq.ct:exception.ct 0.405 0.354 1.145 0.252
session.ct:exception.ct 0.022 0.370 0.058 0.953
typefreq.ct:session.ct:exception.ct -0.102 0.722 -0.141 0.888

There is no evidence that accuracy differs for exception and majority-particle nouns in the child data.

Adults

All Conditions

First run a model in which the fully consistent condition (“consistent”) is set as the baseline for contrasts (i.e., the model compares the fully consistent condition to both the partially consistent condition (“Consistent_VS_Partial”) and the inconsistent condition (“Consistent_VS_Inconsistent”)).

prodOLDadult <- droplevels(subset(prodADULT, oldnew=="old"))
prodOLDadult <- lizCenter(prodOLDadult, list("typefreq", "session"))
prodOLDadult <- lizContrasts(prodOLDadult, prodOLDadult$consistency, "Consistent")

prodOLDadult.lmer1 = glmer(det_correct ~
                          + (Consistent_VS_Partial + Consistent_VS_Inconsistent) * typefreq.ct * session.ct
                          + (1 + session.ct|participant),
                          data = prodOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(prodOLDadult.lmer1)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.843 0.379 10.133 0.000
Consistent_VS_Partial -2.460 0.761 -3.234 0.001
Consistent_VS_Inconsistent -2.170 0.774 -2.803 0.005
typefreq.ct 1.193 0.585 2.038 0.042
session.ct 3.123 0.625 4.997 0.000
Consistent_VS_Partial:typefreq.ct -0.413 1.513 -0.273 0.785
Consistent_VS_Inconsistent:typefreq.ct 0.943 1.554 0.606 0.544
Consistent_VS_Partial:session.ct -0.997 1.101 -0.905 0.365
Consistent_VS_Inconsistent:session.ct 0.412 1.136 0.363 0.717
typefreq.ct:session.ct -0.709 0.799 -0.887 0.375
Consistent_VS_Partial:typefreq.ct:session.ct 2.140 2.120 1.009 0.313
Consistent_VS_Inconsistent:typefreq.ct:session.ct 3.354 2.260 1.484 0.138

We also need to compare the partially consistent and inconsistent conditions (“Inconsistent_VS_Partial”). To do this, rerun the model with the inconsistent condition as the baseline.

prodOLDadult <- lizContrasts(prodOLDadult, prodOLDadult$consistency, "Inconsistent")

prodOLDadult.lmer2 = glmer(det_correct ~
                          + (Inconsistent_VS_Partial + Inconsistent_VS_Consistent) * typefreq.ct * session.ct
                          + (1 + session.ct|participant),
                          data = prodOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodOLDadult.lmer1, prodOLDadult.lmer2)
## Data: prodOLDadult
## Models:
## prodOLDadult.lmer1: det_correct ~ +(Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## prodOLDadult.lmer1:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDadult.lmer2: det_correct ~ +(Inconsistent_VS_Partial + Inconsistent_VS_Consistent) * 
## prodOLDadult.lmer2:     typefreq.ct * session.ct + (1 + session.ct | participant)
##                    Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDadult.lmer1 15 2082.1 2175.5 -1026.1   2052.1             
## prodOLDadult.lmer2 15 2082.1 2175.5 -1026.1   2052.1     0      0
##                    Pr(>Chisq)
## prodOLDadult.lmer1           
## prodOLDadult.lmer2          1
get_coeffs(prodOLDadult.lmer2, c("Inconsistent_VS_Partial", "Inconsistent_VS_Partial:typefreq.ct", "Inconsistent_VS_Partial:session","Inconsistent_VS_Partial:typefreq.ct:session.ct" ))
Estimate Std. Error z value Pr(>|z|)
Inconsistent_VS_Partial -0.290 0.667 -0.435 0.663
Inconsistent_VS_Partial:typefreq.ct -1.355 1.323 -1.024 0.306
Inconsistent_VS_Partial:session.ct -1.409 0.859 -1.641 0.101
Inconsistent_VS_Partial:typefreq.ct:session.ct -1.214 1.708 -0.711 0.477

There were main effects of both session and type frequency. Is performance above chance in each session for each level of type frequency?

prodOLDadult <- lizContrasts(prodOLDadult, prodOLDadult$consistency, "Consistent")

prodOLDadult.lmer2b = glmer(det_correct ~
                          - 1
                          + typefreq:session
                          + (Consistent_VS_Partial + Consistent_VS_Inconsistent) * typefreq.ct * session.ct
                          - typefreq.ct * session.ct
                          + (1 + session.ct|participant),
                          data = prodOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodOLDadult.lmer1, prodOLDadult.lmer2b)
## Data: prodOLDadult
## Models:
## prodOLDadult.lmer1: det_correct ~ +(Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## prodOLDadult.lmer1:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDadult.lmer2b: det_correct ~ -1 + typefreq:session + (Consistent_VS_Partial + 
## prodOLDadult.lmer2b:     Consistent_VS_Inconsistent) * typefreq.ct * session.ct - 
## prodOLDadult.lmer2b:     typefreq.ct * session.ct + (1 + session.ct | participant)
##                     Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDadult.lmer1  15 2082.1 2175.5 -1026.1   2052.1             
## prodOLDadult.lmer2b 15 2082.1 2175.5 -1026.1   2052.1     0      0
##                     Pr(>Chisq)    
## prodOLDadult.lmer1                
## prodOLDadult.lmer2b  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get_coeffs(prodOLDadult.lmer2b, c("typefreqHigh:session1", "typefreqHigh:session4", "typefreqLow:session1", "typefreqLow:session4" ))
Estimate Std. Error z value Pr(>|z|)
typefreqHigh:session1 1.470 0.262 5.609 0
typefreqHigh:session4 4.943 0.777 6.361 0
typefreqLow:session1 3.027 0.322 9.385 0
typefreqLow:session4 5.791 0.817 7.092 0

Looking at session only - is performance above chance in each session?

prodOLDadult.lmer2c = glmer(det_correct ~
                            - 1 
                            + session
                            + (Consistent_VS_Partial + Consistent_VS_Inconsistent) * typefreq.ct * session.ct
                            - session.ct
                            + (1 + session.ct|participant),
                            data = prodOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodOLDadult.lmer1, prodOLDadult.lmer2c)
## Data: prodOLDadult
## Models:
## prodOLDadult.lmer1: det_correct ~ +(Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## prodOLDadult.lmer1:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDadult.lmer2c: det_correct ~ -1 + session + (Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## prodOLDadult.lmer2c:     typefreq.ct * session.ct - session.ct + (1 + session.ct | 
## prodOLDadult.lmer2c:     participant)
##                     Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDadult.lmer1  15 2082.1 2175.5 -1026.1   2052.1             
## prodOLDadult.lmer2c 15 2082.1 2175.5 -1026.1   2052.1     0      0
##                     Pr(>Chisq)
## prodOLDadult.lmer1            
## prodOLDadult.lmer2c          1
get_coeffs(prodOLDadult.lmer2c, c("session1", "session4"))
Estimate Std. Error z value Pr(>|z|)
session1 2.240 0.214 10.485 0
session4 5.363 0.654 8.196 0
Partially Consistent Condition Only

Do adults differ in their performance with majority-particle and exception items in the partially consistent condition? To look at this run a new model just looking at the partially consistent condition, with exception (1 = exception noun; 0 = majority-particle noun) as a fixed effect, as well as interactions with the other factors.

prodOLDadultP <- droplevels(subset(prodOLDadult, consistency == "Partial"))
prodOLDadultP <- lizCenter(prodOLDadultP, list("typefreq", "session", "exception"))


prodOLDadultP.lmer = glmer(det_correct ~ 
                          + typefreq.ct * session.ct * exception.ct
                          + (1 + session.ct * exception.ct|participant), 
                          data = prodOLDadultP, family = binomial, control = glmerControl(optimizer = "bobyqa"))

get_coeffs(prodOLDadultP.lmer, c("exception.ct", "typefreq.ct:exception.ct", "session.ct:exception.ct", "typefreq.ct:session.ct:exception.ct"))
Estimate Std. Error z value Pr(>|z|)
exception.ct -0.930 0.660 -1.409 0.159
typefreq.ct:exception.ct 2.340 0.611 3.829 0.000
session.ct:exception.ct 1.714 1.392 1.231 0.218
typefreq.ct:session.ct:exception.ct -2.353 1.742 -1.350 0.177

The main effect of noun-type is non-significant. There is however a significant interaction between noun-type and type-frequency: Figure 1 (bottom panel) suggest more errors with exception noun when there are more majority-particle nouns (i.e., in the high type frequency condition).

prodOLDadultP.lmer.b = glmer(det_correct ~ 
                             + typefreq:exception.ct
                             + session.ct * typefreq.ct
                             + session.ct:exception.ct
                             + typefreq.ct:session.ct:exception.ct
                             + (1 + session.ct * exception.ct|participant), 
                             data = prodOLDadultP, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodOLDadultP.lmer, prodOLDadultP.lmer.b)
## Data: prodOLDadultP
## Models:
## prodOLDadultP.lmer: det_correct ~ +typefreq.ct * session.ct * exception.ct + (1 + 
## prodOLDadultP.lmer:     session.ct * exception.ct | participant)
## prodOLDadultP.lmer.b: det_correct ~ +typefreq:exception.ct + session.ct * typefreq.ct + 
## prodOLDadultP.lmer.b:     session.ct:exception.ct + typefreq.ct:session.ct:exception.ct + 
## prodOLDadultP.lmer.b:     (1 + session.ct * exception.ct | participant)
##                      Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDadultP.lmer   18 771.78 863.41 -367.89   735.78             
## prodOLDadultP.lmer.b 18 771.78 863.41 -367.89   735.78     0      0
##                      Pr(>Chisq)    
## prodOLDadultP.lmer                 
## prodOLDadultP.lmer.b  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get_coeffs(prodOLDadultP.lmer.b, c("typefreqHigh:exception.ct","typefreqLow:exception.ct"))
Estimate Std. Error z value Pr(>|z|)
typefreqHigh:exception.ct -2.066 0.699 -2.956 0.003
typefreqLow:exception.ct 0.274 0.757 0.362 0.717

Novel Nouns

Figure 2

PRODNEW <- droplevels(subset(prod1, oldnew =="new" & consistency!="Inconsistent"))

# Aggregate to obtain means per participant/day etc.
aggregated.PRODNEW = aggregate(det_correct ~ participant + session_fig + consistency_fig + typefreq + age, PRODNEW, FUN=mean)

#re-order levels of the semantic consistency variable
aggregated.PRODNEW$consistency_fig = factor(aggregated.PRODNEW$consistency_fig, levels(aggregated.PRODNEW$consistency_fig)[c(1,3,2)])
levels(aggregated.PRODNEW$consistency_fig)
## [1] "Fully Consistent"     "Partially Consistent"
# Re-order levels within the age variable (the default is alphabetical: 1 = adult, 2 = child, we want to reverse this order)
aggregated.PRODNEW$age = factor(aggregated.PRODNEW$age, levels(aggregated.PRODNEW$age)[c(2,1)])
levels(aggregated.PRODNEW$age)
## [1] "Child" "Adult"
# Now plot with separate panels for Session 1 and Session 4 for each age group, split by semantic consistency, then by type frequency
p = ggplot(aggregated.PRODNEW, aes(x = consistency_fig, y = det_correct, fill = typefreq))
p = p + stat_summary(fun.y = mean, geom = "bar", position="dodge", colour="black", size=.3) 
p = p + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4, position=position_dodge(.9)) 
p = p + xlab("") + ylab("Proportion Correct")
p = p + scale_fill_manual(values = c("grey", "white"), name = "Type Frequency", labels = c("High", "Low"))
p = p + geom_hline(aes(yintercept = 0.50), linetype = "dashed")
p = p + coord_cartesian(ylim = c(0, 1))
p = p + facet_grid(~ age + session_fig)
p = p + theme_bw()
p = p + mytheme
p = p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)

ggsave("Figure2.jpg", width = 8, height = 6, dpi = 120)

Statistical Analyses

For novel nouns we only include data from the fully consistent and partially consistent conditions. The inconsistent condition cannot be included since there is no “correct” (or majority) particle for semantic category, so no way to score accuracy of particle usage for novel nouns.

Children

prodNEWchild <- droplevels(subset(prod1, age == "Child" & oldnew == "new" & consistency != "Inconsistent"))
prodNEWchild <- lizCenter(prodNEWchild, list("consistency", "typefreq", "session"))

prodNEWchild.lmer1 = glmer(det_correct ~
                          + consistency.ct * typefreq.ct * session.ct
                          + (1 + session.ct|participant),
                          data = prodNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(prodNEWchild.lmer1)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.360 0.105 3.425 0.001
consistency.ct -0.663 0.210 -3.155 0.002
typefreq.ct -0.135 0.209 -0.644 0.519
session.ct 0.388 0.093 4.146 0.000
consistency.ct:typefreq.ct -0.377 0.419 -0.899 0.369
consistency.ct:session.ct -0.504 0.186 -2.706 0.007
typefreq.ct:session.ct -0.127 0.182 -0.700 0.484
consistency.ct:typefreq.ct:session.ct 0.234 0.365 0.640 0.522

Break down the interaction between semantic consistency and session. Figure 2 suggests that the improvement in performance between sessions is greater in the fully consistent condition.

prodNEWchild.lmer2 = glmer(det_correct ~
                          + session.ct:consistency
                          + (consistency.ct * typefreq.ct * session.ct)
                          - session.ct
                          - session.ct:consistency.ct
                          + (1 + session.ct|participant),
                          data = prodNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodNEWchild.lmer1, prodNEWchild.lmer2)
## Data: prodNEWchild
## Models:
## prodNEWchild.lmer1: det_correct ~ +consistency.ct * typefreq.ct * session.ct + (1 + 
## prodNEWchild.lmer1:     session.ct | participant)
## prodNEWchild.lmer2: det_correct ~ +session.ct:consistency + (consistency.ct * typefreq.ct * 
## prodNEWchild.lmer2:     session.ct) - session.ct - session.ct:consistency.ct + (1 + 
## prodNEWchild.lmer2:     session.ct | participant)
##                    Df  AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## prodNEWchild.lmer1 11 4162 4228.8  -2070     4140                        
## prodNEWchild.lmer2 11 4162 4228.8  -2070     4140     0      0          1
get_coeffs(prodNEWchild.lmer2, c("session.ct:consistencyConsistent", "session.ct:consistencyPartial"))
Estimate Std. Error z value Pr(>|z|)
session.ct:consistencyConsistent 0.646 0.141 4.597 0.000
session.ct:consistencyPartial 0.142 0.123 1.149 0.251

Are they above chance in each condition in each session? Re-fit the model removing the intercept, main effects of session and consistency, and session by consistency interaction, adding in an intercept for each session in each condition.

prodNEWchild.lmer3 = glmer(det_correct ~
                          - 1               
                          + session: consistency
                          + (consistency.ct * typefreq.ct * session.ct)
                          - session.ct * consistency.ct
                          + (1 + session.ct|participant),
                          data = prodNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodNEWchild.lmer1, prodNEWchild.lmer3)
## Data: prodNEWchild
## Models:
## prodNEWchild.lmer1: det_correct ~ +consistency.ct * typefreq.ct * session.ct + (1 + 
## prodNEWchild.lmer1:     session.ct | participant)
## prodNEWchild.lmer3: det_correct ~ -1 + session:consistency + (consistency.ct * typefreq.ct * 
## prodNEWchild.lmer3:     session.ct) - session.ct * consistency.ct + (1 + session.ct | 
## prodNEWchild.lmer3:     participant)
##                    Df  AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## prodNEWchild.lmer1 11 4162 4228.8  -2070     4140                        
## prodNEWchild.lmer3 11 4162 4228.8  -2070     4140     0      0  < 2.2e-16
##                       
## prodNEWchild.lmer1    
## prodNEWchild.lmer3 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get_coeffs(prodNEWchild.lmer3, c("session1:consistencyConsistent", "session4:consistencyConsistent", "session1:consistencyPartial", "session4:consistencyPartial"))
Estimate Std. Error z value Pr(>|z|)
session1:consistencyConsistent 0.345 0.130 2.663 0.008
session4:consistencyConsistent 0.991 0.192 5.154 0.000
session1:consistencyPartial -0.041 0.125 -0.326 0.745
session4:consistencyPartial 0.101 0.181 0.559 0.576

Adults

prodNEWadult <- droplevels(subset(prod1, age == "Adult" & oldnew == "new" & consistency != "Inconsistent"))
prodNEWadult <- lizCenter(prodNEWadult, list("consistency", "typefreq", "session"))

prodNEWadult.lmer1 = glmer(det_correct ~
                           + consistency.ct * typefreq.ct * session.ct
                           + (1 + session.ct|participant),
                           data = prodNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(prodNEWadult.lmer1)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.311 0.633 6.805 0.000
consistency.ct -3.443 1.029 -3.345 0.001
typefreq.ct 0.277 1.009 0.275 0.784
session.ct 2.216 1.022 2.167 0.030
consistency.ct:typefreq.ct -1.622 1.980 -0.819 0.413
consistency.ct:session.ct -1.892 1.427 -1.325 0.185
typefreq.ct:session.ct -1.619 1.350 -1.199 0.230
consistency.ct:typefreq.ct:session.ct 3.926 2.591 1.515 0.130

Are they above chance in each condition in each session? Re-fit the model removing the intercept, main effects of session and consistency, and session by consistency interaction, adding in an intercept for each session in each condition.

prodNEWadult.lmer2 = glmer(det_correct ~
                          - 1               
                          + session:consistency
                          + (consistency.ct * typefreq.ct * session.ct)
                          - session.ct * consistency.ct
                          + (1 + session.ct|participant),
                          data = prodNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodNEWadult.lmer1, prodNEWadult.lmer2)
## Data: prodNEWadult
## Models:
## prodNEWadult.lmer1: det_correct ~ +consistency.ct * typefreq.ct * session.ct + (1 + 
## prodNEWadult.lmer1:     session.ct | participant)
## prodNEWadult.lmer2: det_correct ~ -1 + session:consistency + (consistency.ct * typefreq.ct * 
## prodNEWadult.lmer2:     session.ct) - session.ct * consistency.ct + (1 + session.ct | 
## prodNEWadult.lmer2:     participant)
##                    Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodNEWadult.lmer1 11 1327.7 1391.5 -652.85   1305.7             
## prodNEWadult.lmer2 11 1327.7 1391.5 -652.85   1305.7     0      0
##                    Pr(>Chisq)
## prodNEWadult.lmer1           
## prodNEWadult.lmer2          1
get_coeffs(prodNEWadult.lmer2, c("session1:consistencyConsistent", "session4:consistencyConsistent", "session1:consistencyPartial", "session4:consistencyPartial"))
Estimate Std. Error z value Pr(>|z|)
session1:consistencyConsistent 4.354 0.833 5.227 0.000
session4:consistencyConsistent 7.491 1.454 5.152 0.000
session1:consistencyPartial 1.896 0.656 2.890 0.004
session4:consistencyPartial 3.141 0.929 3.383 0.001

Two-Alternative Forced Choice (2AFC) Test

afc$exception = as.factor(afc$exception)

Trained Nouns

Figure 3

All Conditions

afcOLD <- droplevels(subset(afc, oldnew == "old"))

# Aggregate to obtain means per participant/day etc.
aggregated.afcOLD = aggregate(correct ~ participant + consistency_fig + typefreq + age, afcOLD, FUN=mean)

# Re-order levels of the semantic consistency variable (the defauly is alphabetical)
aggregated.afcOLD$consistency_fig = factor(aggregated.afcOLD$consistency_fig,levels(aggregated.afcOLD$consistency_fig)[c(1,3,2)])
levels(aggregated.afcOLD$consistency_fig)
## [1] "Fully Consistent"     "Partially Consistent" "Inconsistent"
# Re-order levels of the age variable (the defauly is alphabetical)
aggregated.afcOLD$age = factor(aggregated.afcOLD$age, levels(aggregated.afcOLD$age)[c(2,1)])
levels(aggregated.afcOLD$age)
## [1] "Child" "Adult"
# Now plot separate panels for each age group, split by semantic consistency, then by type frequency (recall that participants completed the 2afc task only in Session 4, so there is no "session" variable)
p = ggplot(aggregated.afcOLD, aes(x = consistency_fig, y = correct, fill = typefreq))
p = p + stat_summary(fun.y = mean, geom = "bar", position="dodge", colour="black", size=.3) 
p = p + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4, position=position_dodge(.9)) 
p = p + xlab("") + ylab("Proportion Correct")
p = p + scale_fill_manual(values = c("grey", "white"), name = "Type Frequency", labels = c("High", "Low"))
p = p + geom_hline(aes(yintercept = 0.50),linetype = "dashed")
p = p + coord_cartesian(ylim = c(0, 1))
p = p + facet_grid(~ age)
p = p + theme_bw()
p = p + mytheme
p = p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)

ggsave("Figure3_top_panel.jpg", width = 8, height = 5, dpi = 120)

Partially Consistent Condition Only

afcOLDP <- droplevels(subset(afc, oldnew == "old" & consistency == "Partial"))

# Aggregate to obtain means per participant/day etc.
aggregated.afcOLDP = aggregate(correct ~ participant + exception + typefreq + age, afcOLDP, FUN=mean)

# Re-order levels of the age variable (the defauly is alphabetical)
aggregated.afcOLDP$age = factor(aggregated.afcOLDP$age, levels(aggregated.afcOLDP$age)[c(2,1)])
levels(aggregated.afcOLDP$age)
## [1] "Child" "Adult"
p = ggplot(aggregated.afcOLDP, aes(x = typefreq, y = correct, fill = exception))
p = p + stat_summary(fun.y = mean, geom = "bar", position="dodge", colour="black", size=.3) 
p = p + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4, position=position_dodge(.9)) 
p = p + xlab("") + ylab("Proportion Correct")
p = p + scale_fill_manual(values = c("grey", "white"), name = "Noun Type", labels = c("Majority-Particle", "Exception"))
p = p + geom_hline(aes(yintercept = 0.50),linetype = "dashed")
p = p + coord_cartesian(ylim = c(0, 1))
p = p + facet_grid(~ age)
p = p + theme_bw()
p = p + mytheme
print(p)

ggsave("Figure3_bottom_panel.jpg", width = 8, height = 4, dpi = 120)

Statistical Analyses

Children

All Conditions

First run a model in which the fully consistent condition (“consistent”) is set as the baseline for contrasts (i.e., the model compares the fully consistent condition to both the partially consistent condition (“Consistent_VS_Partial”) and the inconsistent condition (“Consistent_VS_Inconsistent”))

afcOLDchild <- droplevels(subset(afc, age == "Child" & oldnew == "old"))
afcOLDchild <- lizCenter(afcOLDchild, list("typefreq"))
afcOLDchild <- lizContrasts(afcOLDchild, afcOLDchild$consistency, "Consistent")

afcOLDchild.lmer1 = glmer(correct ~
                         + (Consistent_VS_Partial + Consistent_VS_Inconsistent) * typefreq.ct
                         + (1|participant),
                         data = afcOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(afcOLDchild.lmer1)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.103 0.132 8.326 0.000
Consistent_VS_Partial -0.731 0.316 -2.310 0.021
Consistent_VS_Inconsistent -0.771 0.316 -2.440 0.015
typefreq.ct 0.092 0.253 0.364 0.716
Consistent_VS_Partial:typefreq.ct 0.039 0.629 0.062 0.951
Consistent_VS_Inconsistent:typefreq.ct -0.148 0.630 -0.234 0.815

Also compare the partially consistent and inconsistent conditions

afcOLDchild <- lizContrasts(afcOLDchild, afcOLDchild$consistency, "Inconsistent")

afcOLDchild.lmer2 = glmer(correct ~
                          + (Inconsistent_VS_Partial + Inconsistent_VS_Consistent) * typefreq.ct
                          + (1|participant),
                          data = afcOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(afcOLDchild.lmer1, afcOLDchild.lmer2)
## Data: afcOLDchild
## Models:
## afcOLDchild.lmer1: correct ~ +(Consistent_VS_Partial + Consistent_VS_Inconsistent) * 
## afcOLDchild.lmer1:     typefreq.ct + (1 | participant)
## afcOLDchild.lmer2: correct ~ +(Inconsistent_VS_Partial + Inconsistent_VS_Consistent) * 
## afcOLDchild.lmer2:     typefreq.ct + (1 | participant)
##                   Df    AIC   BIC  logLik deviance Chisq Chi Df Pr(>Chisq)
## afcOLDchild.lmer1  7 835.45 867.5 -410.73   821.45                        
## afcOLDchild.lmer2  7 835.45 867.5 -410.73   821.45     0      0          1
get_coeffs(afcOLDchild.lmer2, c("Inconsistent_VS_Partial", "Inconsistent_VS_Partial:typefreq.ct"))
Estimate Std. Error z value Pr(>|z|)
Inconsistent_VS_Partial 0.041 0.301 0.135 0.893
Inconsistent_VS_Partial:typefreq.ct 0.186 0.602 0.310 0.757

Performance is significantly higher in the fully consistent condition compared to both the partially consistent and inconsistent conditions. There is no difference between the partially consistent and inconsistent conditions.

Partially Consistent Condition Only

Do children differ in their performance with majority-particle and exception items in the partially consistent condition? To look at this run a new model looking only at the partially consistent data, with exception (1 = exception noun; 0 = majority-particle noun) as a fixed effect, as well as interactions with the other factors.

afcOLDchildP <- droplevels(subset(afcOLDchild, consistency == "Partial"))
afcOLDchildP <- lizCenter(afcOLDchildP, list("typefreq", "exception"))

afcOLDchildP.lmer = glmer(correct ~ 
                          + typefreq.ct * exception.ct
                          + (1 + exception.ct|participant), 
                          data = afcOLDchildP, family = binomial, control = glmerControl(optimizer = "bobyqa"))

get_coeffs(afcOLDchildP.lmer, c("exception.ct", "typefreq.ct:exception.ct"))
Estimate Std. Error z value Pr(>|z|)
exception.ct -0.425 0.324 -1.310 0.190
typefreq.ct:exception.ct -0.426 0.637 -0.668 0.504

No evidence of a difference between majority-particle and exception nouns.

Adults

All Conditions

First run a model in which the fully consistent condition (“consistent”) is set as the baseline for contrasts (i.e., the model compares the fully consistent condition to both the partially consistent condition (“Consistent_VS_Partial”) and the inconsistent condition (“Consistent_VS_Inconsistent”)).

It was not possible to fit the full model (correlation of fixed factors = 1), therefore, we removed the interaction between semantic consistency and type frequency for both models.

afcOLDadult <- droplevels(subset(afc, age == "Adult" & oldnew == "old"))
afcOLDadult <- lizCenter(afcOLDadult, list("typefreq"))
afcOLDadult <- lizContrasts(afcOLDadult, afcOLDadult$consistency, "Consistent")

afcOLDadult.lmer1 = glmer(correct ~
                          + (Consistent_VS_Partial + Consistent_VS_Inconsistent) + typefreq.ct
                          + (1|participant),
                          data = afcOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(afcOLDadult.lmer1)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.087 1.533 3.970 0.000
Consistent_VS_Partial -2.816 1.710 -1.647 0.100
Consistent_VS_Inconsistent -1.707 1.692 -1.009 0.313
typefreq.ct 1.087 1.258 0.864 0.388

Also compare the partially consistent and inconsistent conditions

afcOLDadult <- lizContrasts(afcOLDadult, afcOLDadult$consistency, "Inconsistent")

afcOLDadult.lmer2 = glmer(correct ~
                          + (Inconsistent_VS_Partial + Inconsistent_VS_Consistent) + typefreq.ct
                          + (1|participant),
                          data = afcOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

anova(afcOLDadult.lmer1,afcOLDadult.lmer2)
## Data: afcOLDadult
## Models:
## afcOLDadult.lmer1: correct ~ +(Consistent_VS_Partial + Consistent_VS_Inconsistent) + 
## afcOLDadult.lmer1:     typefreq.ct + (1 | participant)
## afcOLDadult.lmer2: correct ~ +(Inconsistent_VS_Partial + Inconsistent_VS_Consistent) + 
## afcOLDadult.lmer2:     typefreq.ct + (1 | participant)
##                   Df    AIC    BIC  logLik deviance Chisq Chi Df
## afcOLDadult.lmer1  5 199.85 220.72 -94.924   189.85             
## afcOLDadult.lmer2  5 199.85 220.72 -94.924   189.85     0      0
##                   Pr(>Chisq)
## afcOLDadult.lmer1           
## afcOLDadult.lmer2          1
get_coeffs(afcOLDadult.lmer2, c("Inconsistent_VS_Partial"))
Estimate Std. Error z value Pr(>|z|)
Inconsistent_VS_Partial -1.109 1.389 -0.798 0.425
Partially Consistent Condition Only

Do adults differ in their performance with majority-particle and exception items in the partially consistent condition? To look at this run a new model just looking at the partially consistent condition, with exception (1 = exception noun; 0 = majority-particle noun) as a fixed effect, as well as interactions with the other factors.

afcOLDadultP <- droplevels(subset(afcOLDadult, consistency == "Partial"))
afcOLDadultP <- lizCenter(afcOLDadultP, list("typefreq", "exception"))

afcOLDadultP.lmer = glmer(correct ~ 
                          + typefreq.ct * exception.ct
                          + (exception.ct|participant), 
                          data = afcOLDadultP, family = binomial, control = glmerControl(optimizer = "bobyqa"))

get_coeffs(afcOLDadultP.lmer, c("exception.ct", "typefreq.ct:exception.ct"))
Estimate Std. Error z value Pr(>|z|)
exception.ct -0.703 1.617 -0.435 0.664
typefreq.ct:exception.ct 1.104 1.295 0.852 0.394

Novel Nouns

As in the production task, data from the inconsistent condition were not included in the novel nouns analyses

Figure 4

afcNEW <- droplevels(subset(afc, oldnew == "new" & consistency != "Inconsistent"))

# Aggregate to obtain means per participant/day etc.
aggregated.afcNEW = aggregate(correct ~ participant + consistency_fig + typefreq + age, afcNEW, FUN = mean)

# Re-order levels of the age variable (the default is alphabetical).
aggregated.afcNEW$age = factor(aggregated.afcNEW$age, levels(aggregated.afcNEW$age)[c(2,1)])
levels(aggregated.afcNEW$age)
## [1] "Child" "Adult"
# Now plot separate panels for each age group, split by consistency, then by type frequency
p = ggplot(aggregated.afcNEW, aes(x = consistency_fig, y = correct, fill = typefreq))
p = p + stat_summary(fun.y = mean, geom = "bar", position="dodge", colour="black", size=.3) 
p = p + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4, position=position_dodge(.9)) 
p = p + xlab("") + ylab("Proportion Correct")
p = p + scale_fill_manual(values = c("grey", "white"), name = "Type Frequency", labels = c("High", "Low"))
p = p + geom_hline(aes(yintercept = 0.50),linetype = "dashed")
p = p + coord_cartesian(ylim = c(0, 1))
p = p + facet_grid(~ age)
p = p + theme_bw()
p = p + mytheme
print(p)

ggsave("Figure4.jpg", width = 8, height = 6, dpi = 120)

Statistical Analyses

Children

Fully versus Partially Consistent Conditions
afcNEWchild <- droplevels(subset(afc, age == "Child" & oldnew == "new" & consistency != "Inconsistent"))
afcNEWchild <- lizCenter(afcNEWchild, list("consistency", "typefreq"))

afcNEWchild.lmer1 = glmer(correct ~
                          + consistency.ct * typefreq.ct
                          + (1|participant),
                          data = afcNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(afcNEWchild.lmer1)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.676 0.194 3.484 0.000
consistency.ct -1.252 0.386 -3.241 0.001
typefreq.ct -0.381 0.380 -1.004 0.315
consistency.ct:typefreq.ct 0.366 0.758 0.482 0.630
Is Performance Above Chance?
afcNEWchild.lmer2 = glmer(correct ~
                          + consistency
                          + consistency.ct * typefreq.ct
                          - 1
                          - consistency.ct
                          + (1|participant),
                          data = afcNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(afcNEWchild.lmer1, afcNEWchild.lmer2)
## Data: afcNEWchild
## Models:
## afcNEWchild.lmer1: correct ~ +consistency.ct * typefreq.ct + (1 | participant)
## afcNEWchild.lmer2: correct ~ +consistency + consistency.ct * typefreq.ct - 1 - consistency.ct + 
## afcNEWchild.lmer2:     (1 | participant)
##                   Df    AIC    BIC  logLik deviance Chisq Chi Df
## afcNEWchild.lmer1  5 585.05 605.92 -287.52   575.05             
## afcNEWchild.lmer2  5 585.05 605.92 -287.52   575.05     0      0
##                   Pr(>Chisq)    
## afcNEWchild.lmer1               
## afcNEWchild.lmer2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get_coeffs(afcNEWchild.lmer2, c("consistencyConsistent", "consistencyPartial"))
Estimate Std. Error z value Pr(>|z|)
consistencyConsistent 1.302 0.288 4.526 0.000
consistencyPartial 0.050 0.259 0.195 0.846

Adults

Fully versus Partially Consistent Condition

It was not possible to fit the full model (correlation of fixed factors = 1), therefore, as with trained nouns, we removed the interaction between semantic consistency and type frequency.

afcNEWadult <- droplevels(subset(afc, age == "Adult" & oldnew == "new" & consistency != "Inconsistent"))
afcNEWadult <- lizCenter(afcNEWadult, list("consistency", "typefreq"))

afcNEWadult.lmer1 = glmer(correct ~
                          + consistency.ct + typefreq.ct
                          + (1|participant),
                          data = afcNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(afcNEWadult.lmer1)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.566 0.979 4.664 0.000
consistency.ct -3.918 1.394 -2.810 0.005
typefreq.ct 0.819 1.198 0.684 0.494
Is Performance Above Chance?
afcNEWadult.lmer2 = glmer(correct ~
                          + consistency
                          + consistency.ct + typefreq.ct
                          - 1
                          - consistency.ct
                          + (1|participant),
                          data = afcNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(afcNEWadult.lmer1, afcNEWadult.lmer2)
## Data: afcNEWadult
## Models:
## afcNEWadult.lmer1: correct ~ +consistency.ct + typefreq.ct + (1 | participant)
## afcNEWadult.lmer2: correct ~ +consistency + consistency.ct + typefreq.ct - 1 - consistency.ct + 
## afcNEWadult.lmer2:     (1 | participant)
##                   Df    AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## afcNEWadult.lmer1  4 174.12 189.19 -83.06   166.12                        
## afcNEWadult.lmer2  4 174.12 189.19 -83.06   166.12     0      0  < 2.2e-16
##                      
## afcNEWadult.lmer1    
## afcNEWadult.lmer2 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get_coeffs(afcNEWadult.lmer2, c("consistencyConsistent", "consistencyPartial"))
Estimate Std. Error z value Pr(>|z|)
consistencyConsistent 6.525 1.469 4.441 0.000
consistencyPartial 2.607 0.855 3.051 0.002

Awareness

Data Preparation

We need a column coding whether the participant is aware/unaware of the semantic conditioning. This is coded in the column “awareness”. However the coding for inconsistent is inappropriate as there are no semantic patterns in the data. We therefore re-code this as “na”.

prod1$awareness2 = as.character(prod1$awareness)
prod1$awareness2[prod1$consistency == "Inconsistent"] = "na"
prod1$awareness2 = factor(prod1$awareness2)

afc$awareness2 = as.character(afc$awareness)
afc$awareness2[afc$consistency == "Inconsistent"] = "na"
afc$awareness2 = factor(afc$awareness2)

Table 4 Data

Means: Production Test

aggregated.prod = aggregate(det_correct ~ participant + consistency + age + oldnew + awareness2 + typefreq + session + exception, data = prod1, FUN = mean)

aggregated.prod.M <- summarySEwithin(aggregated.prod, measurevar = "det_correct", betweenvars = c("oldnew", "age", "awareness2", "consistency", "typefreq"), withinvars = c("session", "exception"),  na.rm = FALSE, conf.interval = .95)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced

## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
kable(aggregated.prod.M, digits = 3)
oldnew age awareness2 consistency typefreq session exception N det_correct det_correct_norm sd se ci
new Adult 0 Consistent High 1 na 1 0.483 0.686 NA NA NA
new Adult 0 Consistent High 4 na 1 0.469 0.672 NA NA NaN
new Adult 0 Partial High 1 0 5 0.549 0.688 0.376 0.168 0.467
new Adult 0 Partial High 4 0 5 0.531 0.670 0.189 0.085 0.235
new Adult 0 Partial Low 1 0 5 0.547 0.638 0.227 0.102 0.282
new Adult 0 Partial Low 4 0 5 0.629 0.720 0.104 0.047 0.129
new Adult 1 Consistent High 1 na 9 0.760 0.559 0.418 0.139 0.321
new Adult 1 Consistent High 4 na 9 1.000 0.799 0.000 0.000 0.000
new Adult 1 Consistent Low 1 na 10 0.981 0.674 0.043 0.014 0.031
new Adult 1 Consistent Low 4 na 10 0.991 0.684 0.023 0.007 0.017
new Adult 1 Partial High 1 0 5 0.908 0.633 0.160 0.071 0.198
new Adult 1 Partial High 4 0 5 1.000 0.725 0.000 0.000 0.000
new Adult 1 Partial Low 1 0 5 0.866 0.621 0.106 0.047 0.132
new Adult 1 Partial Low 4 0 5 0.981 0.736 0.046 0.021 0.057
new Adult na Inconsistent High 1 na 10 0.304 0.626 0.301 0.095 0.215
new Adult na Inconsistent High 4 na 10 0.410 0.732 0.196 0.062 0.140
new Adult na Inconsistent Low 1 na 10 0.433 0.629 0.254 0.080 0.181
new Adult na Inconsistent Low 4 na 10 0.534 0.729 0.192 0.061 0.137
new Child 0 Consistent High 1 na 7 0.477 0.660 0.057 0.022 0.053
new Child 0 Consistent High 4 na 7 0.514 0.698 0.042 0.016 0.039
new Child 0 Consistent Low 1 na 10 0.497 0.659 0.168 0.053 0.120
new Child 0 Consistent Low 4 na 9 0.539 0.701 0.191 0.064 0.147
new Child 0 Partial High 1 0 11 0.476 0.662 0.113 0.034 0.076
new Child 0 Partial High 4 0 13 0.508 0.694 0.087 0.024 0.053
new Child 0 Partial Low 1 0 15 0.445 0.658 0.104 0.027 0.057
new Child 0 Partial Low 4 0 15 0.486 0.700 0.123 0.032 0.068
new Child 1 Consistent High 1 na 8 0.635 0.570 0.200 0.071 0.167
new Child 1 Consistent High 4 na 8 0.854 0.788 0.128 0.045 0.107
new Child 1 Consistent Low 1 na 4 0.766 0.594 0.209 0.105 0.333
new Child 1 Consistent Low 4 na 5 0.920 0.747 0.127 0.057 0.157
new Child 1 Partial High 1 0 2 0.716 0.630 0.334 0.236 3.000
new Child 1 Partial High 4 0 2 0.814 0.728 0.192 0.136 1.725
new Child na Inconsistent High 1 na 13 0.521 0.670 0.110 0.030 0.066
new Child na Inconsistent High 4 na 15 0.537 0.686 0.127 0.033 0.070
new Child na Inconsistent Low 1 na 12 0.570 0.730 0.171 0.049 0.108
new Child na Inconsistent Low 4 na 15 0.477 0.638 0.062 0.016 0.034
old Adult 0 Consistent High 1 na 1 0.500 0.585 NA NA NaN
old Adult 0 Consistent High 4 na 1 0.688 0.773 NA NA NaN
old Adult 0 Partial High 1 0 5 0.856 0.821 0.164 0.073 0.203
old Adult 0 Partial High 1 1 5 0.450 0.415 0.439 0.197 0.546
old Adult 0 Partial High 4 0 5 0.850 0.815 0.153 0.069 0.191
old Adult 0 Partial High 4 1 5 0.700 0.665 0.382 0.171 0.475
old Adult 0 Partial Low 1 0 5 0.799 0.677 0.180 0.081 0.224
old Adult 0 Partial Low 1 1 5 0.739 0.618 0.265 0.119 0.329
old Adult 0 Partial Low 4 0 5 0.789 0.668 0.136 0.061 0.168
old Adult 0 Partial Low 4 1 5 0.875 0.753 0.097 0.043 0.120
old Adult 1 Consistent High 1 na 9 0.866 0.614 0.227 0.076 0.175
old Adult 1 Consistent High 4 na 9 0.997 0.744 0.011 0.004 0.009
old Adult 1 Consistent Low 1 na 10 0.985 0.675 0.027 0.009 0.019
old Adult 1 Consistent Low 4 na 10 0.994 0.683 0.014 0.005 0.010
old Adult 1 Partial High 1 0 5 0.882 0.761 0.201 0.090 0.249
old Adult 1 Partial High 1 1 5 0.325 0.204 0.229 0.102 0.285
old Adult 1 Partial High 4 0 5 0.992 0.871 0.020 0.009 0.025
old Adult 1 Partial High 4 1 5 1.000 0.879 0.000 0.000 0.000
old Adult 1 Partial Low 1 0 5 0.717 0.488 0.481 0.215 0.598
old Adult 1 Partial Low 1 1 4 0.917 0.688 0.183 0.091 0.291
old Adult 1 Partial Low 4 0 5 1.000 0.771 0.000 0.000 0.000
old Adult 1 Partial Low 4 1 5 1.000 0.771 0.000 0.000 0.000
old Adult na Inconsistent High 1 na 10 0.619 0.531 0.151 0.048 0.108
old Adult na Inconsistent High 4 na 10 0.916 0.827 0.169 0.053 0.121
old Adult na Inconsistent Low 1 na 10 0.840 0.617 0.161 0.051 0.115
old Adult na Inconsistent Low 4 na 10 0.962 0.740 0.118 0.037 0.085
old Child 0 Consistent High 1 na 7 0.514 0.574 0.064 0.024 0.059
old Child 0 Consistent High 4 na 7 0.725 0.784 0.153 0.058 0.142
old Child 0 Consistent Low 1 na 10 0.536 0.544 0.203 0.064 0.145
old Child 0 Consistent Low 4 na 10 0.806 0.814 0.139 0.044 0.100
old Child 0 Partial High 1 0 13 0.519 0.598 0.124 0.034 0.075
old Child 0 Partial High 1 1 13 0.504 0.583 0.265 0.073 0.160
old Child 0 Partial High 4 0 13 0.688 0.767 0.178 0.049 0.108
old Child 0 Partial High 4 1 13 0.690 0.768 0.245 0.068 0.148
old Child 0 Partial Low 1 0 15 0.574 0.600 0.174 0.045 0.096
old Child 0 Partial Low 1 1 15 0.609 0.635 0.249 0.064 0.138
old Child 0 Partial Low 4 0 15 0.702 0.729 0.194 0.050 0.107
old Child 0 Partial Low 4 1 15 0.726 0.752 0.268 0.069 0.148
old Child 1 Consistent High 1 na 8 0.665 0.559 0.165 0.058 0.138
old Child 1 Consistent High 4 na 8 0.905 0.799 0.099 0.035 0.082
old Child 1 Consistent Low 1 na 5 0.792 0.584 0.272 0.122 0.338
old Child 1 Consistent Low 4 na 5 0.981 0.774 0.046 0.021 0.057
old Child 1 Partial High 1 0 2 0.739 0.788 0.337 0.238 3.026
old Child 1 Partial High 1 1 2 0.250 0.299 0.387 0.274 3.480
old Child 1 Partial High 4 0 2 0.958 1.008 0.065 0.046 0.580
old Child 1 Partial High 4 1 2 0.571 0.621 0.664 0.469 5.965
old Child na Inconsistent High 1 na 15 0.519 0.625 0.114 0.029 0.063
old Child na Inconsistent High 4 na 15 0.628 0.733 0.177 0.046 0.098
old Child na Inconsistent Low 1 na 14 0.583 0.585 0.151 0.040 0.087
old Child na Inconsistent Low 4 na 15 0.765 0.767 0.176 0.046 0.098

Means: 2AFC Test

aggregated.afc = aggregate(correct ~ participant + consistency + age + oldnew + awareness2 + typefreq + exception, data = afc, FUN = mean)

aggregated.afc.M <- summarySEwithin(aggregated.afc, measurevar = "correct", betweenvars = c("oldnew", "age", "awareness2", "consistency", "typefreq"), withinvars = "exception",  na.rm = FALSE, conf.interval = .95)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced

## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
kable(aggregated.afc.M, digits = 3)
oldnew age awareness2 consistency typefreq exception N correct correct_norm sd se ci
new Adult 0 Consistent High 0 1 0.500 0.722 NA NA NaN
new Adult 0 Partial High 0 5 0.550 0.722 0.202 0.090 0.250
new Adult 0 Partial Low 0 5 0.600 0.722 0.316 0.141 0.393
new Adult 1 Consistent High 0 9 1.000 0.722 0.000 0.000 0.000
new Adult 1 Consistent Low 0 10 1.000 0.722 0.000 0.000 0.000
new Adult 1 Partial High 0 5 1.000 0.722 0.000 0.000 0.000
new Adult 1 Partial Low 0 5 1.000 0.722 0.000 0.000 0.000
new Adult na Inconsistent High 0 10 0.450 0.722 0.325 0.103 0.232
new Adult na Inconsistent Low 0 10 0.462 0.722 0.312 0.099 0.223
new Child 0 Consistent High 0 7 0.536 0.722 0.318 0.120 0.294
new Child 0 Consistent Low 0 10 0.512 0.722 0.328 0.104 0.234
new Child 0 Partial High 0 13 0.452 0.722 0.266 0.074 0.160
new Child 0 Partial Low 0 15 0.492 0.722 0.324 0.084 0.179
new Child 1 Consistent High 0 8 0.969 0.722 0.082 0.029 0.068
new Child 1 Consistent Low 0 5 1.000 0.722 0.000 0.000 0.000
new Child 1 Partial High 0 2 1.000 0.722 0.000 0.000 0.000
new Child na Inconsistent High 0 15 0.450 0.722 0.274 0.071 0.152
new Child na Inconsistent Low 0 15 0.567 0.722 0.175 0.045 0.097
old Adult 0 Consistent High 0 1 0.250 0.722 NA NA NaN
old Adult 0 Partial High 0 5 0.800 0.822 0.387 0.173 0.481
old Adult 0 Partial High 1 5 0.600 0.622 0.592 0.265 0.735
old Adult 0 Partial Low 0 5 0.733 0.738 0.316 0.141 0.393
old Adult 0 Partial Low 1 5 0.700 0.705 0.387 0.173 0.481
old Adult 1 Consistent High 0 9 1.000 0.722 0.000 0.000 0.000
old Adult 1 Consistent Low 0 10 1.000 0.722 0.000 0.000 0.000
old Adult 1 Partial High 0 5 1.000 0.722 0.000 0.000 0.000
old Adult 1 Partial High 1 5 1.000 0.722 0.000 0.000 0.000
old Adult 1 Partial Low 0 5 1.000 0.722 0.000 0.000 0.000
old Adult 1 Partial Low 1 5 1.000 0.722 0.000 0.000 0.000
old Adult na Inconsistent High 0 10 0.912 0.722 0.168 0.053 0.120
old Adult na Inconsistent Low 0 10 0.975 0.722 0.112 0.035 0.080
old Child 0 Consistent High 0 7 0.643 0.722 0.238 0.090 0.220
old Child 0 Consistent Low 0 10 0.725 0.722 0.310 0.098 0.221
old Child 0 Partial High 0 13 0.628 0.709 0.239 0.066 0.144
old Child 0 Partial High 1 13 0.654 0.734 0.531 0.147 0.321
old Child 0 Partial Low 0 15 0.733 0.788 0.293 0.076 0.162
old Child 0 Partial Low 1 15 0.600 0.655 0.293 0.076 0.162
old Child 1 Consistent High 0 8 0.922 0.722 0.188 0.066 0.157
old Child 1 Consistent Low 0 5 0.975 0.722 0.079 0.035 0.098
old Child 1 Partial High 0 2 1.000 0.972 0.000 0.000 0.000
old Child 1 Partial High 1 2 0.500 0.472 1.000 0.707 8.985
old Child na Inconsistent High 0 15 0.675 0.722 0.312 0.081 0.173
old Child na Inconsistent Low 0 15 0.667 0.722 0.388 0.100 0.215

Compare Number of Aware/Unaware Participants (Chi Square)

adults.aware = with(droplevels(subset(aggregated.afc, age == "Adult" & consistency != "Inconsistent" & oldnew == "new")), table(consistency, awareness2))
adults.aware
##             awareness2
## consistency   0  1
##   Consistent  1 19
##   Partial    10 10
chisq.test(adults.aware)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  adults.aware
## X-squared = 8.0251, df = 1, p-value = 0.004613
child.aware = with(droplevels(subset(aggregated.afc, age == "Child" & consistency != "Inconsistent" & oldnew == "new")), table(consistency, awareness2))
child.aware
##             awareness2
## consistency   0  1
##   Consistent 17 13
##   Partial    28  2
chisq.test(child.aware)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  child.aware
## X-squared = 8.8889, df = 1, p-value = 0.002869

For Table 4 we also need to know the number of aware/unaware participant split by type frequency

adults.awareT4 = with(droplevels(subset(aggregated.afc, age == "Adult" & consistency != "Inconsistent" & oldnew == "new")), table(consistency, typefreq, awareness2))
adults.awareT4
## , , awareness2 = 0
## 
##             typefreq
## consistency  High Low
##   Consistent    1   0
##   Partial       5   5
## 
## , , awareness2 = 1
## 
##             typefreq
## consistency  High Low
##   Consistent    9  10
##   Partial       5   5
child.awareT4 = with(droplevels(subset(aggregated.afc, age == "Child" & consistency != "Inconsistent" & oldnew == "new")), table(consistency, typefreq, awareness2))
child.awareT4
## , , awareness2 = 0
## 
##             typefreq
## consistency  High Low
##   Consistent    7  10
##   Partial      13  15
## 
## , , awareness2 = 1
## 
##             typefreq
## consistency  High Low
##   Consistent    8   5
##   Partial       2   0

Trained Nouns: Production Test

Children, Fully Consistent Condition

Figure 5

PRODOLD.unaware <- droplevels(subset(prod1, oldnew == "old" & age == "Child"))

# Aggregate to obtain means per participant/day etc.
aggregated.PRODOLD.unaware = aggregate(det_correct ~ participant + session_fig + consistency_fig + awareness2, PRODOLD.unaware, FUN = mean)

# Re-order levels of the semantic consistency variable (the default is alphabetical)
aggregated.PRODOLD.unaware$consistency_fig = factor(aggregated.PRODOLD.unaware$consistency_fig, levels(aggregated.PRODOLD.unaware$consistency_fig)[c(1,3,2)])
levels(aggregated.PRODOLD.unaware$consistency_fig)
## [1] "Fully Consistent"     "Partially Consistent" "Inconsistent"
# This plot creates separate panels for Session 1 and Session 4, split by semantic consistency, then by awareness
p = ggplot(aggregated.PRODOLD.unaware, aes(x = consistency_fig, y = det_correct, fill = awareness2))
p = p + stat_summary(fun.y = mean, geom = "bar", position = "dodge", colour = "black", size = .3) 
p = p + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = .4, position = position_dodge(.9)) 
p = p + xlab("") + ylab("Proportion Correct")
p = p + scale_fill_manual(values = c("grey", "white", "black"), name = "Awareness", labels = c("Unaware", "Aware", "NA"))
p = p + geom_hline(aes(yintercept = 0.50), linetype = "dashed")
p = p + coord_cartesian(ylim = c(0, 1))
p = p + facet_grid(~ session_fig)
p = p + theme_bw()
p = p + mytheme
p = p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
print(p)

ggsave("Figure5.jpg", width = 6, height = 6, dpi = 120)

Statistical Analyses: Unaware Participants

Comparisons across noun types, children, production

prodOLDchild.unaware = droplevels(subset(prod1, age == "Child" & oldnew == "old" & awareness2 != 1))
prodOLDchild.unaware <- lizCenter(prodOLDchild.unaware, list("typefreq", "session"))
prodOLDchild.unaware <- lizContrasts(prodOLDchild.unaware, prodOLDchild.unaware$consistency, "Consistent")

prodOLDchild.unaware.lmer2 = glmer(det_correct ~
                                  + (Consistent_VS_Inconsistent + Consistent_VS_Partial) * typefreq.ct * session.ct
                                  + (1 + session.ct|participant),
                                  data = prodOLDchild.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))
kable(summary(prodOLDchild.unaware.lmer2)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.693 0.075 9.174 0.000
Consistent_VS_Inconsistent -0.186 0.191 -0.971 0.331
Consistent_VS_Partial -0.159 0.193 -0.823 0.411
typefreq.ct 0.312 0.150 2.076 0.038
session.ct 0.942 0.106 8.917 0.000
Consistent_VS_Inconsistent:typefreq.ct 0.045 0.384 0.118 0.906
Consistent_VS_Partial:typefreq.ct -0.201 0.387 -0.519 0.604
Consistent_VS_Inconsistent:session.ct -0.406 0.267 -1.522 0.128
Consistent_VS_Partial:session.ct -0.542 0.267 -2.026 0.043
typefreq.ct:session.ct 0.389 0.209 1.860 0.063
Consistent_VS_Inconsistent:typefreq.ct:session.ct 0.359 0.533 0.673 0.501
Consistent_VS_Partial:typefreq.ct:session.ct -0.474 0.536 -0.884 0.376

There is an interaction between Consistent_VS_Partial and session.

prodOLDchild.unaware.lmer2b = glmer(det_correct ~
                                  + Consistent_VS_Partial:session
                                  + Consistent_VS_Inconsistent:session
                                  + (Consistent_VS_Partial + Consistent_VS_Inconsistent):typefreq.ct
                                  + (Consistent_VS_Partial + Consistent_VS_Inconsistent):typefreq.ct:session.ct
                                  + typefreq.ct * session.ct
                                  + (1 + session.ct|participant),
                                  data = prodOLDchild.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))
anova(prodOLDchild.unaware.lmer2, prodOLDchild.unaware.lmer2b)
## Data: prodOLDchild.unaware
## Models:
## prodOLDchild.unaware.lmer2: det_correct ~ +(Consistent_VS_Inconsistent + Consistent_VS_Partial) * 
## prodOLDchild.unaware.lmer2:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDchild.unaware.lmer2b: det_correct ~ +Consistent_VS_Partial:session + Consistent_VS_Inconsistent:session + 
## prodOLDchild.unaware.lmer2b:     (Consistent_VS_Partia