Load packages and helper functions

Packages.

rm(list=ls())
library(ggplot2)
library(plotrix)
#suppressPackageStartupMessages(library(dplyr))
library(plyr)
#library(bootstrap)
suppressPackageStartupMessages(library(lme4))
library(Kendall)
## Warning: package 'Kendall' was built under R version 3.3.2
library(knitr)
theme_set(theme_bw())
opts_chunk$set(fig.width=8, fig.height=5, 
echo=TRUE, warning=FALSE, message=FALSE, cache=TRUE)

Helper functions.

SummarySE

This function can be found on the website “Cookbook for R”.

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper

It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).

data: a data frame.

measurevar: the name of a column that contains the variable to be summarized. groupvars: a vector containing names of columns that contain grouping variables. na.rm: a boolean that indicates whether to ignore NA’s. conf.interval: the percent range of the confidence interval (default is 95%).

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
    require(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}

myCenter

This function ouputs the centered values of an variable, which can be a numeric variable, a factor, or a data frame. It was taken from Florian Jaegers blog https://hlplab.wordpress.com/2009/04/27/centering-several-variables/.

From his blog:

-If the input is a numeric variable, the output is the centered variable.

-If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor’s levels are converted into numerical values in their inherent order (if not specified otherwise, R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.

-If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by “c” (e.g. “Variable1” will be “cVariable1”).

myCenter= function(x) {
  if (is.numeric(x)) { return(x - mean(x, na.rm=T)) }
    if (is.factor(x)) {
        x= as.numeric(x)
        return(x - mean(x, na.rm=T))
    }
    if (is.data.frame(x) || is.matrix(x)) {
        m= matrix(nrow=nrow(x), ncol=ncol(x))
        colnames(m)= paste("c", colnames(x), sep="")
    
        for (i in 1:ncol(x)) {
        
            m[,i]= myCenter(x[,i])
        }
        return(as.data.frame(m))
    }
}

lizCenter

This function provides a wrapper around myCenter allowing you to center a specific list of variables from a dataframe.

x: data frame listfname: a list of the variables to be centered (e.g. list(variable1,variable2))

The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labelled with the each column’s previous name, but with “.ct” appended (e.g., “variable1” will become “variable1.ct”).

lizCenter= function(x, listfname) 
{
    for (i in 1:length(listfname)) 
    {
        fname = as.character(listfname[i])
        x[paste(fname,".ct", sep="")] = myCenter(x[fname])
    }
        
    return(x)
}

lizContrasts

This function can be used used to create two centered dummy variables which stand in place of a three way factor (condition). This allows us to inspect each contrast separately, as well as their interactions with other factors. Other fixed effects in the model in the model can be evaluated as the average effects across all levels of the factor.

The function takes a dataframe (d), a factor from that database (condition), which must have three levels, and the name of the level of the factor which is to be used as the baseline for the contrasts (baselevel).

For example, if d is dataframe with a factor “condition” with three levels “lex_skew” lex_noskew" “mixed” then lizContrasts(d, d$condition, “lex_no_skew”) returns a dataframe with two (numeric) columns added labelled “lex_noskew_VERSUS_lex_mixed” and “lex_noskew_VERSUS_lex_skew”. Wherever you would normally use “condition” in a formula in an lme, it can be replaced by (lex_noskew_VERSUS_lex_mixed + “lex_noskew_VERSUS_lex_skew) e.g. ~ (a * condition) becomes ~ (a * (lex_noskew_VERSUS_lex_mixed + lex_noskew_VERSUS_lex_skew)).

lizContrasts= function(d, condition, baselevel) 
{
  
    condition = factor(condition)
  condition = relevel(condition, baselevel)

    a= (contrasts(condition)-apply(contrasts(condition),2,mean))
    d$dummy1[condition== rownames(a)[1]] <- a[1] 
    d$dummy1[condition== rownames(a)[2]] <- a[2] 
    d$dummy1[condition== rownames(a)[3]] <- a[3] 
    d$dummy2[condition== rownames(a)[1]] <- a[4] 
    d$dummy2[condition== rownames(a)[2]] <- a[5] 
    d$dummy2[condition== rownames(a)[3]] <- a[6] 

    name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
    name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")

    d[name1] = d$dummy1 
    d[name2] = d$dummy2 

    d$dummy1 <-NULL 
    d$dummy2 <-NULL 
    
    return(d)
}

lizContrasts4

This function is a version of the previous lizContrasts function but to be used to create three centered dummy variables which stand in place of a four way factor (condition).

lizContrasts4= function(d, condition, baselevel) 
{
  condition = factor(condition)
  condition = relevel(condition, baselevel)

    a= (contrasts(condition)-apply(contrasts(condition),2,mean))
    d$dummy1[condition== rownames(a)[1]] <- a[1] 
    d$dummy1[condition== rownames(a)[2]] <- a[2] 
    d$dummy1[condition== rownames(a)[3]] <- a[3] 
    d$dummy1[condition== rownames(a)[4]] <- a[4] 
    d$dummy2[condition== rownames(a)[1]] <- a[5] 
    d$dummy2[condition== rownames(a)[2]] <- a[6] 
    d$dummy2[condition== rownames(a)[3]] <- a[7] 
    d$dummy2[condition== rownames(a)[4]] <- a[8] 
    d$dummy3[condition== rownames(a)[1]] <- a[9] 
    d$dummy3[condition== rownames(a)[2]] <- a[10] 
    d$dummy3[condition== rownames(a)[3]] <- a[11] 
    d$dummy3[condition== rownames(a)[4]] <- a[12] 

    name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
    name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")
    name3 = paste(baselevel, rownames(a)[4],sep="_VERSUS_")

    d[name1] = d$dummy1 
    d[name2] = d$dummy2 
    d[name3] = d$dummy3 

    d$dummy1 <-NULL 
    d$dummy2 <-NULL 
    d$dummy3 <-NULL 
    
    return(d)
}

Bf

This function is equivalent to the Dienes (2008) calculator which can be found here http://www.lifesci.sussex.ac.uk/home/Zoltan_Dienes/inference/Bayes.htm.

The code was provided by Baguely & Kayne (2010) and can be found here: http://www.academia.edu/427288/Review_of_Understanding_psychology_as_a_science_An_introduction_to_scientific_and_statistical_inference

Bf<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0,sdtheory=1, tail=2){
  area <- 0
  if(identical(uniform, 1)){
    theta <- lower
    range <- upper - lower
    incr <- range / 2000
    for (A in -1000:1000){
          theta <- theta + incr
          dist_theta <- 1 / range
          height <- dist_theta * dnorm(obtained, theta, sd)
          area <- area + height * incr
    }
  }else
      {theta <- meanoftheory - 5 * sdtheory
       incr <- sdtheory / 200
       for (A in -1000:1000){
            theta <- theta + incr
            dist_theta <- dnorm(theta, meanoftheory, sdtheory)
            if(identical(tail, 1)){
                if (theta <= 0){
                    dist_theta <- 0
                } else {
                    dist_theta <- dist_theta * 2
                }
            }
            height <- dist_theta * dnorm(obtained, theta, sd)
            area <- area + height * incr
        }
  }
  LikelihoodTheory <- area
  Likelihoodnull <- dnorm(obtained, 0, sd)
  BayesFactor <- LikelihoodTheory / Likelihoodnull
  ret <- list("LikelihoodTheory" = LikelihoodTheory,"Likelihoodnull" = Likelihoodnull, "BayesFactor" = BayesFactor)
  
  
  ret
}  

Bf_powercalc

This works with the Bf funciton above. It requires the same values as that function (i.e. the obtained mean and SE for the current sample, a value for the predicted mean, which is set to be sdtheory (with meanoftheory=0), and the current number of participants N). However rather than return BF for current sample, it works out what the BF would be for a range of different subject numbers (assuming that the SE scales with sqrt(N)),

Bf_powercalc<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0, sdtheory=1, tail=2, N, min, max)
{
 
  x = c(0)
  y = c(0)
      # note: working out what the difference between N and df is (for the contrast between two groups, this is 2; for constraints where there is 4 groups this will be 3, etc.)   

  for(newN in min : max)
  {
    B = as.numeric(Bf(sd = sd*sqrt(N/newN), obtained, uniform, lower, upper, meanoftheory, sdtheory, tail)[3])
    x= append(x,newN)   
    y= append(y,B)
    output = cbind(x,y)
    
  }  
  output = output[-1,]  
  return(output) 
  
}

Load dataset

The data file contains data for both input nouns and minimal exposure nouns. Analyses were conducted with non-alteranting nouns only, and therefore productions with alternating nouns have been removed from this data set. The data has also been filtered to remove trials not meeting the baseline criteria described in the paper (baseline criteria: trials were excluded if children had initially used an incorrect noun and been corrected by the experimenter, if they inserted an alternative word for a particle, or if they failed to include a particle. Children were not penalized for omitting to repeat the initial “moop”.)

# alldata = read.csv("skewdata_Aug2016.csv")

# Load new version of dataset that includes age and listening span
alldata = read.csv("skewdata_Dec2016.csv")

Experiment 1 (children), participant group information (Table 1)

get means for each participant

aggregated.child.age.listsp  = aggregate(cbind(age_months, listeningspanRAW, listeningspan) ~ participantid + condition, subset(alldata, agegroup == "child"), FUN=mean)

get values for Table 1

kable(summarySE(aggregated.child.age.listsp,  measurevar="age_months", groupvars = "condition"), digits = 2)
condition N age_months sd se ci
gen_noskew 15 74.27 4.10 1.06 2.27
lex_noskew 15 73.60 5.05 1.30 2.80
lex_skew 15 75.07 4.15 1.07 2.30
mixed 15 72.47 5.66 1.46 3.13
kable(summarySE(aggregated.child.age.listsp,  measurevar="listeningspan", groupvars = "condition"), digits = 2)
condition N listeningspan sd se ci
gen_noskew 15 107.27 20.78 5.37 11.51
lex_noskew 15 106.80 20.00 5.16 11.08
lex_skew 15 108.93 10.00 2.58 5.54
mixed 15 102.33 15.06 3.89 8.34

Since our contrasts compare the lexicalist-unskewed condition against each of the other conditions, use t-tests to compare the mean age and listening span of this group against those of each of the other groups - no significant differences were found (note: for listensing span, we use raw scores in statistical analyses)

# Age
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months, subset(aggregated.child.age.listsp, condition == "lex_skew")$age_months)
## 
##  Welch Two Sample t-test
## 
## data:  subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months and subset(aggregated.child.age.listsp, condition == "lex_skew")$age_months
## t = -0.86875, df = 26.975, p-value = 0.3926
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.930801  1.997468
## sample estimates:
## mean of x mean of y 
##  73.60000  75.06667
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months, subset(aggregated.child.age.listsp, condition == "mixed")$age_months)
## 
##  Welch Two Sample t-test
## 
## data:  subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months and subset(aggregated.child.age.listsp, condition == "mixed")$age_months
## t = 0.57873, df = 27.654, p-value = 0.5675
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.880318  5.146984
## sample estimates:
## mean of x mean of y 
##  73.60000  72.46667
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months, subset(aggregated.child.age.listsp, condition == "gen_noskew")$age_months)
## 
##  Welch Two Sample t-test
## 
## data:  subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months and subset(aggregated.child.age.listsp, condition == "gen_noskew")$age_months
## t = -0.39688, df = 26.849, p-value = 0.6946
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.114151  2.780818
## sample estimates:
## mean of x mean of y 
##  73.60000  74.26667
# Listening Span
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW, subset(aggregated.child.age.listsp, condition == "lex_skew")$listeningspanRAW)
## 
##  Welch Two Sample t-test
## 
## data:  subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW and subset(aggregated.child.age.listsp, condition == "lex_skew")$listeningspanRAW
## t = -1.2292, df = 19.395, p-value = 0.2337
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.8804211  0.7470878
## sample estimates:
## mean of x mean of y 
##  6.133333  7.200000
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW, subset(aggregated.child.age.listsp, condition == "mixed")$listeningspanRAW)
## 
##  Welch Two Sample t-test
## 
## data:  subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW and subset(aggregated.child.age.listsp, condition == "mixed")$listeningspanRAW
## t = 0.73102, df = 26.394, p-value = 0.4712
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.327215  2.793882
## sample estimates:
## mean of x mean of y 
##  6.133333  5.400000
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW, subset(aggregated.child.age.listsp, condition == "gen_noskew")$listeningspanRAW)
## 
##  Welch Two Sample t-test
## 
## data:  subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW and subset(aggregated.child.age.listsp, condition == "gen_noskew")$listeningspanRAW
## t = -0.42477, df = 26.879, p-value = 0.6744
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.110134  2.043467
## sample estimates:
## mean of x mean of y 
##  6.133333  6.666667

Experiment 1 (children), input nouns

Select appropriate data

exp1.child.trained = subset(alldata, agegroup == "child" & old_me == "old")

Check proportion of missing data

For the lexical-skew and lexical-noskew conditions, the number of possible trials contributing to the analysis is 15 children * 4 nouns * 4 productions per noun making 240 trials; for the mixed condition it’s 15 children * 2 nouns * 4 productions per noun making 120 trials. There are no trials for the generalist condition. Tthe proportion of missing trials in each condition is therefore:

1-(round(table(exp1.child.trained$condition)/c(0,15*4*4,15*4*4,15*2*4),2)) 
## 
## gen_noskew lex_noskew   lex_skew      mixed 
##                  0.05       0.03       0.02

Get the means

Means for table 3

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.child.trained), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
lex_noskew 15 0.74 0.16 0.04 0.09
lex_skew 15 0.96 0.06 0.02 0.03
mixed 15 0.70 0.26 0.07 0.14

Separating out the nouns with the majority/ minority particle in the lexicalist-skewed condition (note: majority = 1, minority = 0).

kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.child.trained, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det N correct sd se ci
0 15 0.90 0.23 0.06 0.13
1 15 0.99 0.03 0.01 0.02

Statistical analyses

Contrast between conditions 1

lme analysis all data

set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.

exp1.child.trained = lizContrasts(exp1.child.trained, exp1.child.trained$condition, "lex_noskew") 
exp1.child.trained = lizCenter(exp1.child.trained, list("correct_det"))

run the lmer model:

child.trained.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid), control = glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained)

kable(summary(child.trained.lmer)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.413 0.328 7.350 0.000
lex_noskew_VERSUS_mixed -0.050 0.579 -0.087 0.931
lex_noskew_VERSUS_lex_skew 3.114 0.741 4.204 0.000
correct_det.ct -0.348 0.707 -0.492 0.622
lex_noskew_VERSUS_mixed:correct_det.ct 0.124 1.327 0.093 0.926
lex_noskew_VERSUS_lex_skew:correct_det.ct 0.705 1.601 0.440 0.660

BF analyses corresponding to lme analysis all data

For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:

lex_noskew_VERSUS_mixed_child_trained_MEANDIFF = abs(summary(child.trained.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_child_trained_MEANDIFF
## [1] 0.05044514
lex_noskew_VERSUS_mixed_child_trained_SE = abs(summary(child.trained.lmer)$coefficients[8])
lex_noskew_VERSUS_mixed_child_trained_SE 
## [1] 0.5792347
lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF = abs(summary(child.trained.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF
## [1] 3.114291
lex_noskew_VERSUS_lex_skew_child_trained_SE = abs(summary(child.trained.lmer)$coefficients[9])
lex_noskew_VERSUS_lex_skew_child_trained_SE
## [1] 0.7407486

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_mixed_child_trained_SE , obtained=lex_noskew_VERSUS_mixed_child_trained_MEANDIFF  ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1525377
## 
## $Likelihoodnull
## [1] 0.6861334
## 
## $BayesFactor
## [1] 0.222315
Bf(sd=lex_noskew_VERSUS_lex_skew_child_trained_SE, obtained=lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.154154
## 
## $Likelihoodnull
## [1] 7.816447e-05
## 
## $BayesFactor
## [1] 1972.174

Comparing accuracy with majority and minority particle nouns within skewed language

lme analyis: skewed data

Select the subset of data from skewed langauge and center (i) the variable of interest (majority_det – which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable

exp1.child.trained.skew = subset(exp1.child.trained, condition == "lex_skew")
exp1.child.trained.skew = lizCenter(exp1.child.trained.skew, list("correct_det", "majority_det"))

Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants. The model with the interaction with the control variable and full random slopes structure didn’t converge and was systematically simplified.

Non-converging models:

child.trained.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)

child.trained.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct + correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)

child.trained.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)

Converging model:

child.trained.skew.lmer = glmer(correct ~ (majority_det.ct + correct_det.ct) + (majority_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)

kable(summary(child.trained.skew.lmer)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 5.610 1.475 3.803 0.000
majority_det.ct -1.855 3.666 -0.506 0.613
correct_det.ct -2.525 2.299 -1.098 0.272

BF analysis corresponding to lme analysis for skewed data (comparing accuracy with majority and minority particle nouns within skewed language)

We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:

majority_VERSUS_minority_child_trained_MEANDIFF = abs(summary(child.trained.skew.lmer)$coefficients[2])
majority_VERSUS_minority_child_trained_MEANDIFF 
## [1] 1.855478
majority_VERSUS_minority_child_trained_SE = abs(summary(child.trained.skew.lmer)$coefficients[5])
majority_VERSUS_minority_child_trained_SE 
## [1] 3.665624

We now use these values in the BF calculator along with the predicted difference of 2.161343 (obtained from equivalent analsyes over data from Wonnacott, 2011)

Bf(sd=majority_VERSUS_minority_child_trained_SE , obtained=majority_VERSUS_minority_child_trained_MEANDIFF ,  uniform = 0, sdtheory = 2.161343, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1023541
## 
## $Likelihoodnull
## [1] 0.09574681
## 
## $BayesFactor
## [1] 1.069008

Contrast between conditions 2

Remove two of the three majority particle nouns (specifically, those coded as 3 and 4) from the lex_skew condition. This will leave just two nouns - coded 1 and 2 - which go with the minority particle and majority particle respectively, so that majority and minority particle nouns contribute equally to the input.

exp1.child.trained$remove = 0
exp1.child.trained$remove[exp1.child.trained$condition == "lex_skew" & exp1.child.trained$noun == "trainednoun3" ]=1
exp1.child.trained$remove[exp1.child.trained$condition == "lex_skew" & exp1.child.trained$noun == "trainednoun4" ]=1
exp1.child.trained.balanced = subset(exp1.child.trained, remove == 0)

View new mean for lex_skew

kable(summarySE(aggregate(correct ~ participantid + condition, FUN = mean, data = subset(exp1.child.trained.balanced, condition=="lex_skew")), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
lex_skew 15 0.95 0.12 0.03 0.07

lme analysis - balanced data set

For lme analysis, set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function

exp1.child.trained.balanced = lizContrasts(exp1.child.trained.balanced, exp1.child.trained.balanced$condition, "lex_noskew") 
exp1.child.trained.balanced = lizCenter(exp1.child.trained.balanced, list("correct_det"))

Run the lmer model:

child.trained.lmer.balanced = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.balanced)

kable(summary(child.trained.lmer.balanced)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.394 0.368 6.512 0.000
lex_noskew_VERSUS_mixed -0.033 0.626 -0.053 0.957
lex_noskew_VERSUS_lex_skew 3.004 0.838 3.586 0.000
correct_det.ct 0.092 0.773 0.119 0.906
lex_noskew_VERSUS_mixed:correct_det.ct 0.181 1.389 0.130 0.896
lex_noskew_VERSUS_lex_skew:correct_det.ct 1.714 1.759 0.975 0.330

BF analyses corresponding to lme analysis with balanced data set

For the contrast between lex_skew and lex_noskew, we require an estimate of the mean difference between the conditions and of the corresponding standard error and this is taken from the model above.

lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF = abs(summary(child.trained.lmer.balanced)$coefficients[3])
lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF
## [1] 3.004239
lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE = abs(summary(child.trained.lmer.balanced)$coefficients[9])
lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE
## [1] 0.8378305

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011)

Bf(sd=lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE, obtained=lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1607544
## 
## $Likelihoodnull
## [1] 0.0007687465
## 
## $BayesFactor
## [1] 209.1123

Experiment 1 (children), minimal exposure nouns

Select appropriate data

exp1.child.me = subset(alldata, agegroup == "child" & old_me == "m.e.")

Check proportion of missing data

In each condition, the number of possible trials contributing to the analysis is 15 children * 2 nouns * 4 productions per noun making 120 trials.

The proportion of missing trials in each condition is therefore:

1-(round(table(exp1.child.me$condition)/c(15*2*4,15*2*4,15*2*4,15*2*4),2)) 
## 
## gen_noskew lex_noskew   lex_skew      mixed 
##       0.05       0.05       0.06       0.05

Get the means

Means for Table 1

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.child.me), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
gen_noskew 15 0.58 0.27 0.07 0.15
lex_noskew 15 0.64 0.30 0.08 0.17
lex_skew 15 0.86 0.22 0.06 0.12
mixed 15 0.58 0.17 0.04 0.10

Separating out the nouns with the majority/ minority particle in the lexicalist-skewed condition.

kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.child.me, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det N correct sd se ci
0 15 0.77 0.42 0.11 0.23
1 15 0.92 0.22 0.06 0.12

Statistical analyses

Contrast between conditions

lme analysis

set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.

exp1.child.me = lizContrasts4(exp1.child.me, exp1.child.me$condition, "lex_noskew") 
exp1.child.me = lizCenter(exp1.child.me, list("correct_det"))

run the lmer model:

child.me.lmer = glmer(correct ~  (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me)

kable(summary(child.me.lmer)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.637 0.355 4.607 0.000
lex_noskew_VERSUS_mixed -0.488 0.821 -0.595 0.552
lex_noskew_VERSUS_lex_skew 2.664 0.926 2.876 0.004
lex_noskew_VERSUS_gen_noskew -0.655 0.821 -0.798 0.425
correct_det.ct -0.364 0.792 -0.460 0.646
lex_noskew_VERSUS_mixed:correct_det.ct 0.379 1.927 0.197 0.844
lex_noskew_VERSUS_lex_skew:correct_det.ct -0.537 2.102 -0.256 0.798
lex_noskew_VERSUS_gen_noskew:correct_det.ct -0.790 1.933 -0.409 0.683

BF analyses corresponding to lme analysis all data

For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:

lex_noskew_VERSUS_mixed_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_child_me_MEANDIFF
## [1] 0.4882859
lex_noskew_VERSUS_mixed_child_me_SE = abs(summary(child.me.lmer)$coefficients[10])
lex_noskew_VERSUS_mixed_child_me_SE 
## [1] 0.8213223
lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF
## [1] 2.663741
lex_noskew_VERSUS_lex_skew_child_me_SE = abs(summary(child.me.lmer)$coefficients[11])
lex_noskew_VERSUS_lex_skew_child_me_SE
## [1] 0.9260369
lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[4])
lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF
## [1] 0.6548345
lex_noskew_VERSUS_gen_noskew_child_me_SE = abs(summary(child.me.lmer)$coefficients[12])
lex_noskew_VERSUS_gen_noskew_child_me_SE
## [1] 0.8208018

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_mixed_child_me_SE , obtained=lex_noskew_VERSUS_mixed_child_me_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1964035
## 
## $Likelihoodnull
## [1] 0.4070492
## 
## $BayesFactor
## [1] 0.4825055
Bf(sd=lex_noskew_VERSUS_lex_skew_child_me_SE, obtained=lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1797892
## 
## $Likelihoodnull
## [1] 0.006879501
## 
## $BayesFactor
## [1] 26.13405
Bf(sd=lex_noskew_VERSUS_gen_noskew_child_me_SE, obtained=lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2108706
## 
## $Likelihoodnull
## [1] 0.3535585
## 
## $BayesFactor
## [1] 0.5964233
BF power cacluation for minimal exposure lexical_unskewed versus mixed contrast

We found no substantial preference for either H1 over H0, or H0 over H1. We now consider how many more participants we might need in order to have evidence in each direction.

First, assume ACTUAL mean difference is ZERO: with this amount of variance, approx how many participants would we need to have substantial evidnece for the NULL (given that standard error scales with number of participants)?

x=Bf_powercalc(sd=lex_noskew_VERSUS_mixed_child_me_SE , obtained=0  ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1,N=30, min=10, max=600) 
plot(x)
abline(h=(1/3))

x[12:14,]                 
##       x         y
## [1,] 21 0.3373585
## [2,] 22 0.3304887
## [3,] 23 0.3240245

suggests we need an N of 22 (note that this is LESS than the current N of 30)

Second, assume the ACTUAL mean difference is the current estimate: with this amount of variance, approx how many participants would we need to have substantial evidnece for H1 (given that standard error scales with number of participants)?

x=Bf_powercalc(sd=lex_noskew_VERSUS_mixed_child_me_SE, obtained=lex_noskew_VERSUS_mixed_child_me_MEANDIFF ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1,N=30, min=10, max=600)  
plot(x)
abline(h=(3))

x[511:513,]                 
##        x        y
## [1,] 520 2.987672
## [2,] 521 3.002579
## [3,] 522 3.017565

suggests we need an N of 521 (i.e.would need 261 per condition)

Comparing accuracy with majority and minority particle nouns within skewed language

lme analyis skewed data

Select the subset of data from skewed langauge and center (i) the variable of interest (majority_det – which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable

exp1.child.me.skew = subset(exp1.child.me, condition == "lex_skew")
exp1.child.me.skew= lizCenter(exp1.child.me.skew, list("correct_det", "majority_det"))

Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants. The model with the interaction with the control variable and full random slopes structure didn’t converge and was systematically simplified.

Non-converging models:

child.me.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)

child.me.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct + correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)

Converging model:

child.me.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)

kable(summary(child.me.skew.lmer)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 9.636 2.443 3.944 0.000
majority_det.ct -2.186 4.718 -0.463 0.643
correct_det.ct -0.712 3.290 -0.216 0.829
majority_det.ct:correct_det.ct 1.650 6.353 0.260 0.795

BF analysis corresponding to lme analysis for skewed data (comparing accuracy with majority and minority particle nouns within skewed language)

We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:

majority_VERSUS_minority_child_me_MEANDIFF = abs(summary(child.me.skew.lmer)$coefficients[2])
majority_VERSUS_minority_child_me_MEANDIFF 
## [1] 2.18568
majority_VERSUS_minority_child_me_SE = abs(summary(child.me.skew.lmer)$coefficients[6])
majority_VERSUS_minority_child_me_SE 
## [1] 4.718476

We now use these values in the Bf calculator along with the predicted difference of 1.261336 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=majority_VERSUS_minority_child_me_SE , obtained=majority_VERSUS_minority_child_me_MEANDIFF ,  uniform = 0, sdtheory = 1.261336, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.08078344
## 
## $Likelihoodnull
## [1] 0.07594776
## 
## $BayesFactor
## [1] 1.063671

Experiment 1 (children), reported awareness of lexicality

Means for Table 4 (awareness of lexicality)

trained nouns (aware = 1, unaware = 0)

kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.child.trained), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition lexicallyaware N correct sd se ci
lex_noskew 0 12 0.73 0.16 0.04 0.10
lex_noskew 1 3 0.75 0.22 0.12 0.54
lex_skew 0 7 0.99 0.03 0.01 0.02
lex_skew 1 8 0.94 0.07 0.03 0.06
mixed 0 11 0.73 0.28 0.08 0.19
mixed 1 4 0.63 0.20 0.10 0.32

minimal exposure nouns

kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.child.me), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition lexicallyaware N correct sd se ci
gen_noskew 0 11 0.62 0.25 0.07 0.17
gen_noskew 1 4 0.46 0.34 0.17 0.55
lex_noskew 0 12 0.63 0.29 0.09 0.19
lex_noskew 1 3 0.67 0.38 0.22 0.95
lex_skew 0 7 0.82 0.24 0.09 0.22
lex_skew 1 8 0.89 0.21 0.07 0.17
mixed 0 11 0.61 0.19 0.06 0.13
mixed 1 4 0.48 0.04 0.02 0.06

Experiment 2 (adults), trained nouns

Select appropriate data

exp1.adult.trained = subset(alldata, agegroup == "adult" & old_me == "old")

check proportion of missing data

For the lexical-skew and lexical-noskew conditions, the number of possible trials contributing to the analysis is 15 adults * 4 nouns * 4 productions per noun making 240 trials; for the mixed condition it’s 15 adults * 2 nouns * 4 productions per noun making 120 trials. There was no generalist condition for adults. The proportion of missing trials in each condition is therefore:

1-(round(table(exp1.adult.trained$condition)/c(0,15*4*4,15*4*4,15*2*4),2)) 
## 
## gen_noskew lex_noskew   lex_skew      mixed 
##                     0          0          0

Get the means

Means for Table 3

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.adult.trained), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
lex_noskew 15 0.78 0.24 0.06 0.13
lex_skew 15 0.93 0.14 0.04 0.08
mixed 15 0.61 0.17 0.04 0.09

Separating out the nouns with the majority/ minority particle in the lexicalist-skewed condition.

kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.adult.trained, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det N correct sd se ci
0 15 0.88 0.27 0.07 0.15
1 15 0.94 0.12 0.03 0.06

Statistical analyses

Contrast between conditions 1

lme analysis all data

Set up the data frame so that we can look at the contrasts between the lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.

exp1.adult.trained = lizContrasts(exp1.adult.trained, exp1.adult.trained$condition, "lex_noskew") 
exp1.adult.trained = lizCenter(exp1.adult.trained, list("correct_det"))

run the lmer model:

adult.trained.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.trained)

kable(summary(adult.trained.lmer)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.996 0.304 6.557 0.000
lex_noskew_VERSUS_mixed -1.296 0.617 -2.099 0.036
lex_noskew_VERSUS_lex_skew 1.759 0.691 2.544 0.011
correct_det.ct -0.220 0.406 -0.541 0.589
lex_noskew_VERSUS_mixed:correct_det.ct -0.447 0.625 -0.715 0.475
lex_noskew_VERSUS_lex_skew:correct_det.ct -0.948 0.787 -1.205 0.228

BF analyses corresponding to lme analysis all data

For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:

lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF = abs(summary(adult.trained.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF
## [1] 1.2955
lex_noskew_VERSUS_mixed_adult_trained_SE = abs(summary(adult.trained.lmer)$coefficients[8])
lex_noskew_VERSUS_mixed_adult_trained_SE 
## [1] 0.6170808
lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF = abs(summary(adult.trained.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF
## [1] 1.759346
lex_noskew_VERSUS_lex_skew_adult_trained_SE = abs(summary(adult.trained.lmer)$coefficients[9])
lex_noskew_VERSUS_lex_skew_adult_trained_SE
## [1] 0.6914349

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011)

Bf(sd=lex_noskew_VERSUS_mixed_adult_trained_SE , obtained=lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF  ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2491816
## 
## $Likelihoodnull
## [1] 0.07136657
## 
## $BayesFactor
## [1] 3.491573
Bf(sd=lex_noskew_VERSUS_lex_skew_adult_trained_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2302123
## 
## $Likelihoodnull
## [1] 0.02265997
## 
## $BayesFactor
## [1] 10.15943

Comparing accuracy with majority and minority particle nouns within skewed language

lme analyis skewed data

Select the subset of data from skewed langauge and center (i) the variable of interest (majority_det - which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable

exp1.adult.trained.skew = subset(exp1.adult.trained, condition == "lex_skew")
exp1.adult.trained.skew= lizCenter(exp1.adult.trained.skew, list("correct_det", "majority_det"))

Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants.

adult.trained.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.trained.skew)

kable(summary(adult.trained.skew.lmer)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.231 2.209 2.821 0.005
majority_det.ct 2.426 3.394 0.715 0.475
correct_det.ct 4.205 4.347 0.967 0.333
majority_det.ct:correct_det.ct 3.940 7.540 0.523 0.601

BF analysis corresponding to lme analysis for skewed data (comparing accuracy with majority and minority particle nouns within skewed language)

We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:

majority_VERSUS_minority_adult_trained_MEANDIFF = abs(summary(adult.trained.skew.lmer)$coefficients[2])
majority_VERSUS_minority_adult_trained_MEANDIFF 
## [1] 2.426358
majority_VERSUS_minority_adult_trained_SE = abs(summary(adult.trained.skew.lmer)$coefficients[6])
majority_VERSUS_minority_adult_trained_SE 
## [1] 3.39417

We now use these values in the Bf calculator along with the predicted difference of 2.161343 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=majority_VERSUS_minority_adult_trained_SE , obtained=majority_VERSUS_minority_adult_trained_MEANDIFF ,  uniform = 0, sdtheory = 2.161343, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1071971
## 
## $Likelihoodnull
## [1] 0.09103505
## 
## $BayesFactor
## [1] 1.177537

Contrast between conditions 2

Remove two of the three majority particle nouns (specifically, those coded as 3 and 4) from the lex_skew condition. This will leave just two nouns - coded 1 and 2 - which go with the minority particle and majority particle respectively, so that majority and minority particle nouns contribute equally to the input.

exp1.adult.trained$remove = 0
exp1.adult.trained$remove[exp1.adult.trained$condition == "lex_skew" & exp1.adult.trained$noun == "trainednoun3" ]=1
exp1.adult.trained$remove[exp1.adult.trained$condition == "lex_skew" & exp1.adult.trained$noun == "trainednoun4" ]=1
exp1.adult.trained.balanced = subset(exp1.adult.trained, remove == 0)

View new mean for lex_skew

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = subset(exp1.adult.trained.balanced, condition=="lex_skew")), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
lex_skew 15 0.94 0.13 0.03 0.07

lme analysis - balanced data set

Set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function

exp1.adult.trained.balanced = lizContrasts(exp1.adult.trained.balanced, exp1.adult.trained.balanced$condition, "lex_noskew") 
exp1.adult.trained.balanced = lizCenter(exp1.adult.trained.balanced, list("correct_det"))

Run the lmer model:

adult.trained.lmer.balanced = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.trained.balanced)

kable(summary(adult.trained.lmer.balanced)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.086 0.331 6.294 0.000
lex_noskew_VERSUS_mixed -1.241 0.565 -2.196 0.028
lex_noskew_VERSUS_lex_skew 2.149 0.808 2.660 0.008
correct_det.ct -0.665 0.529 -1.258 0.209
lex_noskew_VERSUS_mixed:correct_det.ct -0.322 0.660 -0.487 0.626
lex_noskew_VERSUS_lex_skew:correct_det.ct -1.955 1.262 -1.549 0.121

BF analyses corresponding to lme analysis with balanced data set

For the contrast between lex_skew and lex_noskew, we require an estimate of the mean difference between the conditions and of the corresponding standard error and this is taken from the model above

lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF = abs(summary(adult.trained.lmer.balanced)$coefficients[3])
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF
## [1] 2.148546
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE = abs(summary(adult.trained.lmer.balanced)$coefficients[9])
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE
## [1] 0.8076812

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011)

Bf(sd=lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2088627
## 
## $Likelihoodnull
## [1] 0.01435687
## 
## $BayesFactor
## [1] 14.54793

Experiment 2 (adults), minimal exposure nouns

Select appropriate data

exp1.adult.me = subset(alldata, agegroup == "adult" & old_me == "m.e.")

Check proportion of missing data

In each condition, the number of possible trials contributing to the analysis is 15 children * 2 nouns * 4 productions per noun making 1524 trials. There is no generalist condition for adults. The proportion of missing trials in each condition is therefore:

1-(round(table(exp1.adult.me$condition)/c(0,15*2*4,15*2*4,15*2*4),2)) 
## 
## gen_noskew lex_noskew   lex_skew      mixed 
##                  0.00       0.00       0.01

Get the means

Means for Table 1

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.adult.me), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
lex_noskew 15 0.89 0.27 0.07 0.15
lex_skew 15 0.97 0.13 0.03 0.07
mixed 15 0.84 0.26 0.07 0.14

Separating out the nouns with the majority/ minority particle in the lexicalist-skewed condition.

kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.adult.me, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det N correct sd se ci
0 15 1.00 0.00 0.00 0.00
1 15 0.93 0.26 0.07 0.14

Statistical analyses

Contrast between conditions

lme analysis

Set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.

exp1.adult.me = lizContrasts(exp1.adult.me, exp1.adult.me$condition, "lex_noskew") 
exp1.adult.me = lizCenter(exp1.adult.me, list("correct_det"))

run the lmer model:

adult.me.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.me)

kable(summary(adult.me.lmer)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 12.541 13.922 0.901 0.368
lex_noskew_VERSUS_mixed -0.704 1.991 -0.354 0.724
lex_noskew_VERSUS_lex_skew 10.920 41.580 0.263 0.793
correct_det.ct 6.948 27.856 0.249 0.803
lex_noskew_VERSUS_mixed:correct_det.ct -0.150 3.523 -0.043 0.966
lex_noskew_VERSUS_lex_skew:correct_det.ct 20.475 83.273 0.246 0.806

BF analyses corresponding to lme analysis all data

For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:

lex_noskew_VERSUS_mixed_adult_me_MEANDIFF = abs(summary(adult.me.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_adult_me_MEANDIFF
## [1] 0.7040022
lex_noskew_VERSUS_mixed_adult_me_SE = abs(summary(adult.me.lmer)$coefficients[8])
lex_noskew_VERSUS_mixed_adult_me_SE 
## [1] 1.991199
lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF = abs(summary(adult.me.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF
## [1] 10.92037
lex_noskew_VERSUS_lex_skew_adult_me_SE = abs(summary(adult.me.lmer)$coefficients[9])
lex_noskew_VERSUS_lex_skew_adult_me_SE
## [1] 41.58015

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_mixed_adult_me_SE , obtained=lex_noskew_VERSUS_mixed_adult_me_MEANDIFF  ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1410783
## 
## $Likelihoodnull
## [1] 0.1882138
## 
## $BayesFactor
## [1] 0.7495643
Bf(sd=lex_noskew_VERSUS_lex_skew_adult_me_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF ,  uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.009397112
## 
## $Likelihoodnull
## [1] 0.009269279
## 
## $BayesFactor
## [1] 1.013791

comparing accuracy with majority and minority particle nouns within skewed language

lme analyis skewed data

select the subset of data from skewed langauge and center (i) the variable of interest (majority_det – which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable

exp1.adult.me.skew = subset(exp1.adult.me, condition == "lex_skew")
exp1.adult.me.skew = lizCenter(exp1.adult.me.skew, list("correct_det", "majority_det"))

Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants.

adult.me.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct) +  (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.me.skew)

kable(summary(adult.me.skew.lmer)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 26.311 3172748 0 1
majority_det.ct -0.829 6345497 0 1
correct_det.ct 13.807 6345497 0 1
majority_det.ct:correct_det.ct 16.789 12690993 0 1

BF analysis corresponding to lme analysis for skewed data (comparing accuracy with majority and minority particle nouns within skewed language)

We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:

majority_VERSUS_minority_adult_me_MEANDIFF = abs(summary(adult.me.skew.lmer)$coefficients[2])
majority_VERSUS_minority_adult_me_MEANDIFF 
## [1] 0.8290701
majority_VERSUS_minority_adult_me_SE = abs(summary(adult.me.skew.lmer)$coefficients[6])
majority_VERSUS_minority_adult_me_SE 
## [1] 6345497

We now use these values in the Bf calculator along with the predicted difference of 1.261336 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=majority_VERSUS_minority_adult_me_SE , obtained=majority_VERSUS_minority_adult_me_MEANDIFF ,  uniform = 0, sdtheory = 1.261336, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 6.27447e-08
## 
## $Likelihoodnull
## [1] 6.287014e-08
## 
## $BayesFactor
## [1] 0.9980047

Experiment 2 (adults), reported awareness of lexicality

Means for table 5 (awareness of lexicality)

Trained nouns (aware = 1, unaware = 0)

kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.trained), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition lexicallyaware N correct sd se ci
lex_noskew 1 15 0.78 0.24 0.06 0.13
lex_skew 0 1 1.00 NA NA NA
lex_skew 1 14 0.92 0.14 0.04 0.08
mixed 0 6 0.60 0.18 0.08 0.19
mixed 1 9 0.61 0.17 0.06 0.13

Minimal exposure nouns

Means for table 5 (awareness of lexicality)

kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.me), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition lexicallyaware N correct sd se ci
lex_noskew 1 15 0.89 0.27 0.07 0.15
lex_skew 0 1 1.00 NA NA NA
lex_skew 1 14 0.96 0.13 0.04 0.08
mixed 0 6 0.81 0.35 0.14 0.37
mixed 1 9 0.86 0.19 0.06 0.15

statistical comparison

Comparing the extent of reported awareness of lexical patterns in the different conditions using fishers exact test

awaretable = with(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.me), 
                  table(condition, lexicallyaware))

kable(awaretable)
0 1
gen_noskew 0 0
lex_noskew 0 15
lex_skew 1 14
mixed 6 9
# mixed versus lexicalist-unskewed
fisher.test(rbind(awaretable[2,], awaretable[4,]))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  rbind(awaretable[2, ], awaretable[4, ])
## p-value = 0.01686
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0000000 0.6740343
## sample estimates:
## odds ratio 
##          0
# mixed versus lexicalist-skewed
fisher.test(rbind(awaretable[3,], awaretable[4,]))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  rbind(awaretable[3, ], awaretable[4, ])
## p-value = 0.08008
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.002183889 1.190995284
## sample estimates:
## odds ratio 
##  0.1151766

Subsidary analsyes for children (ruling out confounds of age and listening span in skew advantage)

An annoynouns reviewer pointed out that the children in the lexicalist skewed condition are somehwat older and higher listening span.

To explore this confound:

First, we looked for the following correlations:

  1. performance on input nouns and age
  2. performance on input nouns and listeningspan
  3. performance on minimal exposure nouns and age
  4. performance on minimal exposure and listeningspan

(note that we use non-parametric statsitics given that our data are underlyingly binomial- however results with Pearson are equivalent)

with(aggregate(cbind(age_months,correct,listeningspanRAW) ~ participantid, exp1.child.trained, FUN=mean), Kendall(correct, age_months))
## tau = 0.0938, 2-sided pvalue =0.40557
with(aggregate(cbind(age_months,correct,listeningspanRAW) ~ participantid, exp1.child.trained, FUN=mean), Kendall(correct, listeningspanRAW))
## tau = 0.0655, 2-sided pvalue =0.57197
with(aggregate(cbind(age_months,correct,listeningspanRAW) ~ participantid, exp1.child.me, FUN=mean), Kendall(correct, age_months))
## tau = 0.088, 2-sided pvalue =0.37565
with(aggregate(cbind(age_months,correct,listeningspanRAW) ~ participantid, exp1.child.me, FUN=mean), Kendall(correct, listeningspanRAW))
## tau = 0.0914, 2-sided pvalue =0.36537

No signifcant correlations.

Second, we re-run the models for children above such that they include each of age and listening span

exp1.child.trained = lizCenter(exp1.child.trained, list("age_months", "listeningspanRAW"))

child.trained.lmer.age = glmer(correct ~  (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + age_months.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained)
kable(summary(child.trained.lmer.age)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.419 0.329 7.348 0.000
lex_noskew_VERSUS_mixed -0.020 0.586 -0.035 0.972
lex_noskew_VERSUS_lex_skew 3.101 0.742 4.179 0.000
correct_det.ct -0.348 0.705 -0.494 0.622
age_months.ct 0.019 0.044 0.432 0.666
lex_noskew_VERSUS_mixed:correct_det.ct 0.134 1.323 0.101 0.919
lex_noskew_VERSUS_lex_skew:correct_det.ct 0.713 1.598 0.446 0.655
child.trained.lmer.listspan = glmer(correct ~  (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + listeningspanRAW.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained)
kable(summary(child.trained.lmer.listspan)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.412 0.327 7.367 0.000
lex_noskew_VERSUS_mixed -0.168 0.573 -0.293 0.770
lex_noskew_VERSUS_lex_skew 3.295 0.750 4.395 0.000
correct_det.ct -0.283 0.717 -0.394 0.693
listeningspanRAW.ct -0.173 0.089 -1.948 0.051
lex_noskew_VERSUS_mixed:correct_det.ct 0.116 1.378 0.084 0.933
lex_noskew_VERSUS_lex_skew:correct_det.ct 0.878 1.638 0.536 0.592
exp1.child.me = lizCenter(exp1.child.me, list("age_months", "listeningspanRAW"))

child.me.lmer.age = glmer(correct ~  (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + age_months.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me)
kable(summary(child.me.lmer.age)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.640 0.355 4.619 0.000
lex_noskew_VERSUS_mixed -0.448 0.826 -0.542 0.588
lex_noskew_VERSUS_lex_skew 2.650 0.925 2.867 0.004
lex_noskew_VERSUS_gen_noskew -0.651 0.819 -0.794 0.427
correct_det.ct -0.380 0.791 -0.480 0.631
age_months.ct 0.023 0.065 0.352 0.725
lex_noskew_VERSUS_mixed:correct_det.ct 0.411 1.925 0.213 0.831
lex_noskew_VERSUS_lex_skew:correct_det.ct -0.569 2.101 -0.271 0.786
lex_noskew_VERSUS_gen_noskew:correct_det.ct -0.775 1.930 -0.402 0.688
child.me.lmer.age = glmer(correct ~  (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + listeningspanRAW.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me)
kable(summary(child.me.lmer.age)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.634 0.356 4.596 0.000
lex_noskew_VERSUS_mixed -0.517 0.826 -0.627 0.531
lex_noskew_VERSUS_lex_skew 2.737 0.942 2.905 0.004
lex_noskew_VERSUS_gen_noskew -0.613 0.826 -0.742 0.458
correct_det.ct -0.294 0.805 -0.366 0.715
listeningspanRAW.ct -0.060 0.114 -0.523 0.601
lex_noskew_VERSUS_mixed:correct_det.ct 0.344 1.940 0.177 0.859
lex_noskew_VERSUS_lex_skew:correct_det.ct -0.426 2.121 -0.201 0.841
lex_noskew_VERSUS_gen_noskew:correct_det.ct -0.823 1.945 -0.423 0.672

In line with the correlations, age and listeningspan are not signficant predictors for either input nouns or minimal exposure nouns. Critically, we see the same pattern of results for key experimental effects (a reliable contrast between lexicalist-skewed and lexicalist-unskewed - no other reliable differences).

As a final check, we re-ran the model having removed the three oldest children from the skewed condition (making the lexicalist skewed and lexicalist unskewed means near identical).

Show that these are the three oldest children and that the means are balanced once they are removed:

aggregated.child.age.listsp  = aggregate(cbind(age_months, listeningspanRAW, listeningspan) ~ participantid + condition, subset(alldata, agegroup == "child"), FUN=mean)
aggregated.child.age.listsp
##          participantid  condition age_months listeningspanRAW
## 1   gen_noskew_child_1 gen_noskew         80                6
## 2  gen_noskew_child_10 gen_noskew         81               13
## 3  gen_noskew_child_11 gen_noskew         72                7
## 4  gen_noskew_child_12 gen_noskew         77                1
## 5  gen_noskew_child_13 gen_noskew         70                7
## 6  gen_noskew_child_14 gen_noskew         74                7
## 7  gen_noskew_child_15 gen_noskew         72                3
## 8   gen_noskew_child_2 gen_noskew         72                4
## 9   gen_noskew_child_3 gen_noskew         74                1
## 10  gen_noskew_child_4 gen_noskew         70               11
## 11  gen_noskew_child_5 gen_noskew         78               14
## 12  gen_noskew_child_6 gen_noskew         70                6
## 13  gen_noskew_child_7 gen_noskew         73                7
## 14  gen_noskew_child_8 gen_noskew         70                6
## 15  gen_noskew_child_9 gen_noskew         81                7
## 16  lex_noskew_child_1 lex_noskew         70                7
## 17 lex_noskew_child_10 lex_noskew         79                5
## 18 lex_noskew_child_11 lex_noskew         69               12
## 19 lex_noskew_child_12 lex_noskew         68                5
## 20 lex_noskew_child_13 lex_noskew         70               10
## 21 lex_noskew_child_14 lex_noskew         71                6
## 22 lex_noskew_child_15 lex_noskew         73                1
## 23  lex_noskew_child_2 lex_noskew         79                4
## 24  lex_noskew_child_3 lex_noskew         74                1
## 25  lex_noskew_child_4 lex_noskew         72                6
## 26  lex_noskew_child_5 lex_noskew         80                5
## 27  lex_noskew_child_6 lex_noskew         81               10
## 28  lex_noskew_child_7 lex_noskew         64                7
## 29  lex_noskew_child_8 lex_noskew         77                5
## 30  lex_noskew_child_9 lex_noskew         77                8
## 31    lex_skew_child_1   lex_skew         79                6
## 32   lex_skew_child_10   lex_skew         80                7
## 33   lex_skew_child_11   lex_skew         75                9
## 34   lex_skew_child_12   lex_skew         77                8
## 35   lex_skew_child_13   lex_skew         69                7
## 36   lex_skew_child_14   lex_skew         72                7
## 37   lex_skew_child_15   lex_skew         70                7
## 38    lex_skew_child_2   lex_skew         72               10
## 39    lex_skew_child_3   lex_skew         78                7
## 40    lex_skew_child_4   lex_skew         70                6
## 41    lex_skew_child_5   lex_skew         72                4
## 42    lex_skew_child_6   lex_skew         78                8
## 43    lex_skew_child_7   lex_skew         80                7
## 44    lex_skew_child_8   lex_skew         81                8
## 45    lex_skew_child_9   lex_skew         73                7
## 46       mixed_child_1      mixed         62                5
## 47      mixed_child_10      mixed         69               10
## 48      mixed_child_11      mixed         72                0
## 49      mixed_child_12      mixed         72                3
## 50      mixed_child_13      mixed         67                4
## 51      mixed_child_14      mixed         72                6
## 52      mixed_child_15      mixed         64                3
## 53       mixed_child_2      mixed         79                8
## 54       mixed_child_3      mixed         77                6
## 55       mixed_child_4      mixed         74                5
## 56       mixed_child_5      mixed         72                6
## 57       mixed_child_6      mixed         78                5
## 58       mixed_child_7      mixed         81                6
## 59       mixed_child_8      mixed         79                8
## 60       mixed_child_9      mixed         69                6
##    listeningspan
## 1             99
## 2            133
## 3            104
## 4             75
## 5            125
## 6            104
## 7             85
## 8             89
## 9             75
## 10           136
## 11           138
## 12           119
## 13           104
## 14           119
## 15           104
## 16           125
## 17            94
## 18           136
## 19           113
## 20           136
## 21           119
## 22            75
## 23            89
## 24            75
## 25            99
## 26            94
## 27           119
## 28           125
## 29            94
## 30           109
## 31            99
## 32           104
## 33           114
## 34           109
## 35           125
## 36           104
## 37           125
## 38           119
## 39           104
## 40           119
## 41            89
## 42           109
## 43           101
## 44           109
## 45           104
## 46           113
## 47           136
## 48            70
## 49            85
## 50           108
## 51            99
## 52           102
## 53           109
## 54            99
## 55            94
## 56            99
## 57            94
## 58            99
## 59           109
## 60           119
exp1.child.trained.agebalenced = subset(exp1.child.trained, participantid != "lex_skew_child_2" & participantid != "lex_skew_child_6" & participantid != "lex_skew_child_3")
exp1.child.me.agebalenced = subset(exp1.child.me, participantid != "lex_skew_child_2" & participantid != "lex_skew_child_6" & participantid != "lex_skew_child_3")

aggregate(age_months ~ condition, exp1.child.trained.agebalenced, FUN=mean)
##    condition age_months
## 1 lex_noskew   73.77632
## 2   lex_skew   74.97312
## 3      mixed   72.43220

Re-run the models:

child.trained.lmer.agebal = glmer(correct ~  (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.agebalenced)

kable(summary(child.trained.lmer.agebal)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.388 0.345 6.927 0.000
lex_noskew_VERSUS_mixed -0.045 0.594 -0.077 0.939
lex_noskew_VERSUS_lex_skew 3.034 0.794 3.824 0.000
correct_det.ct -0.097 0.737 -0.131 0.895
lex_noskew_VERSUS_mixed:correct_det.ct 0.143 1.346 0.106 0.915
lex_noskew_VERSUS_lex_skew:correct_det.ct 1.344 1.704 0.789 0.430
child.me.lmer.agebal = glmer(correct ~  (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.agebalenced)

kable(summary(child.me.lmer.agebal)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.714 0.366 4.678 0.000
lex_noskew_VERSUS_mixed -0.456 0.793 -0.575 0.565
lex_noskew_VERSUS_lex_skew 3.014 1.024 2.945 0.003
lex_noskew_VERSUS_gen_noskew -0.648 0.790 -0.820 0.412
correct_det.ct -0.477 0.854 -0.558 0.577
lex_noskew_VERSUS_mixed:correct_det.ct 0.371 2.012 0.185 0.854
lex_noskew_VERSUS_lex_skew:correct_det.ct -0.975 2.371 -0.411 0.681
lex_noskew_VERSUS_gen_noskew:correct_det.ct -0.822 2.015 -0.408 0.684

The contrast between lexicalist skewed and lexicalist unskewed remains reliable.