Load packages and helper functions

Packages.

rm(list=ls())
library(ggplot2)
library(plotrix)
#suppressPackageStartupMessages(library(dplyr))
library(plyr)
#library(bootstrap)
suppressPackageStartupMessages(library(lme4))
library(knitr)
theme_set(theme_bw())
opts_chunk$set(fig.width=8, fig.height=5, 
                      echo=TRUE, warning=FALSE, message=FALSE, cache=TRUE)

Helper functions.

SummarySE

This function can be found on the website “Cookbook for R”.

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper

It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).

data: a data frame.

measurevar: the name of a column that contains the variable to be summarized. groupvars: a vector containing names of columns that contain grouping variables. na.rm: a boolean that indicates whether to ignore NA’s. conf.interval: the percent range of the confidence interval (default is 95%).

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
    require(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}

myCenter

This function ouputs the centered values of an variable, which can be a numeric variable, a factor, or a data frame. It was taken from Florian Jaegers blog https://hlplab.wordpress.com/2009/04/27/centering-several-variables/.

From his blog:

-If the input is a numeric variable, the output is the centered variable.

-If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor’s levels are converted into numerical values in their inherent order (if not specified otherwise, R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.

-If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by “c” (e.g. “Variable1” will be “cVariable1”).

myCenter= function(x) {
  if (is.numeric(x)) { return(x - mean(x, na.rm=T)) }
    if (is.factor(x)) {
        x= as.numeric(x)
        return(x - mean(x, na.rm=T))
    }
    if (is.data.frame(x) || is.matrix(x)) {
        m= matrix(nrow=nrow(x), ncol=ncol(x))
        colnames(m)= paste("c", colnames(x), sep="")
    
        for (i in 1:ncol(x)) {
        
            m[,i]= myCenter(x[,i])
        }
        return(as.data.frame(m))
    }
}

lizCenter

This function provides a wrapper around myCenter allowing you to center a specific list of variables from a dataframe.

x: data frame listfname: a list of the variables to be centered (e.g. list(variable1,variable2))

The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labelled with the each column’s previous name, but with “.ct” appended (e.g., “variable1” will become “variable1.ct”).

lizCenter= function(x, listfname) 
{
    for (i in 1:length(listfname)) 
    {
        fname = as.character(listfname[i])
        x[paste(fname,".ct", sep="")] = myCenter(x[fname])
    }
        
    return(x)
}

lizContrasts

This function can be used used to create two centered dummy variables which stand in place of a three way factor (condition). This allows us to inspect each contrast separately, as well as their interactions with other factors. Other fixed effects in the model in the model can be evaluated as the average effects across all levels of the factor.

The function takes a dataframe (d), a factor from that database (condition), which must have three levels, and the name of the level of the factor which is to be used as the baseline for the contrasts (baselevel).

For example, if d is dataframe with a factor “condition” with three levels “lex_skew” lex_noskew" “mixed” then lizContrasts(d, d$condition, “lex_no_skew”) returns a dataframe with two (numeric) columns added labelled “lex_noskew_VERSUS_lex_mixed” and “lex_noskew_VERSUS_lex_skew”. Wherever you would normally use “condition” in a formula in an lme, it can be replaced by (lex_noskew_VERSUS_lex_mixed + “lex_noskew_VERSUS_lex_skew) e.g. ~ (a * condition) becomes ~ (a * (lex_noskew_VERSUS_lex_mixed + lex_noskew_VERSUS_lex_skew)).

lizContrasts= function(d, condition, baselevel) 
{
  
    condition = factor(condition)
  condition = relevel(condition, baselevel)

    a= (contrasts(condition)-apply(contrasts(condition),2,mean))
    d$dummy1[condition== rownames(a)[1]] <- a[1] 
    d$dummy1[condition== rownames(a)[2]] <- a[2] 
    d$dummy1[condition== rownames(a)[3]] <- a[3] 
    d$dummy2[condition== rownames(a)[1]] <- a[4] 
    d$dummy2[condition== rownames(a)[2]] <- a[5] 
    d$dummy2[condition== rownames(a)[3]] <- a[6] 

    name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
    name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")

    d[name1] = d$dummy1 
    d[name2] = d$dummy2 

    d$dummy1 <-NULL 
    d$dummy2 <-NULL 
    
    return(d)
}

lizContrasts4

This function is a version of the previous lizContrasts function but to be used to create three centered dummy variables which stand in place of a four way factor (condition).

lizContrasts4= function(d, condition, baselevel) 
{
  condition = factor(condition)
  condition = relevel(condition, baselevel)

    a= (contrasts(condition)-apply(contrasts(condition),2,mean))
    d$dummy1[condition== rownames(a)[1]] <- a[1] 
    d$dummy1[condition== rownames(a)[2]] <- a[2] 
    d$dummy1[condition== rownames(a)[3]] <- a[3] 
    d$dummy1[condition== rownames(a)[4]] <- a[4] 
    d$dummy2[condition== rownames(a)[1]] <- a[5] 
    d$dummy2[condition== rownames(a)[2]] <- a[6] 
    d$dummy2[condition== rownames(a)[3]] <- a[7] 
    d$dummy2[condition== rownames(a)[4]] <- a[8] 
    d$dummy3[condition== rownames(a)[1]] <- a[9] 
    d$dummy3[condition== rownames(a)[2]] <- a[10] 
    d$dummy3[condition== rownames(a)[3]] <- a[11] 
    d$dummy3[condition== rownames(a)[4]] <- a[12] 

    name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
    name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")
    name3 = paste(baselevel, rownames(a)[4],sep="_VERSUS_")

    d[name1] = d$dummy1 
    d[name2] = d$dummy2 
    d[name3] = d$dummy3 

    d$dummy1 <-NULL 
    d$dummy2 <-NULL 
    d$dummy3 <-NULL 
    
    return(d)
}

Bf

This function is a version of the Dienes (2008) calculator. It can be found at this webstie: http://www.lifesci.sussex.ac.uk/home/Zoltan_Dienes/inference/Bf%20with%20t%20likelihood.html

From that website:

The following R code is based on the Baguley and Kaye (2010) R code for the Dienes (2008) calculator. The likelihood function for the data has been changed from a Normal distribution to a t-distribution. This is a way of accounting for the variance of observations being unknown in advance of the data - thus no correction factor need be applied to the SE when this calculator is used.

Bf<-function(sd, obtained, dfdata, uniform, lower=0, upper=1, meanoftheory=0, sdtheory=1, tail=2)
{
  area <- 0
  if(identical(uniform, 1)){
    theta <- lower
    range <- upper - lower
    incr <- range / 2000
    for (A in -1000:1000){
      theta <- theta + incr
      dist_theta <- 1 / range
      height <- dist_theta * dnorm(obtained, theta, sd)
      area <- area + height * incr
    }
  }else{
    theta <- meanoftheory - 5 * sdtheory
    incr <- sdtheory / 200
    for (A in -1000:1000){
      theta <- theta + incr
      dist_theta <- dnorm(theta, meanoftheory, sdtheory)
      if(identical(tail, 1)){
        if (theta <= 0){
          dist_theta <- 0
        } else {
          dist_theta <- dist_theta * 2
        }
      }
      height <- dist_theta * dt((obtained-theta)/sd, df=dfdata)
      area <- area + height * incr
    }
  }
  LikelihoodTheory <- area
  Likelihoodnull <- dt(obtained/sd, df = dfdata)
  BayesFactor <- LikelihoodTheory / Likelihoodnull
  ret <- list("LikelihoodTheory" = LikelihoodTheory, "Likelihoodnull" = Likelihoodnull, BayesFactor)
    ret
  
}

Bf_powercalc

This works with the Bf funciton above. It requires the same values as that function (i.e. the obtained mean and SE for the current sample, a value for the predicted mean, which is set to be sdtheory (with meanoftheory=0), and the current number of participants N). However rather than return BF for current sample, it works out what the BF would be for a range of different subject numbers (assuming that the SE scales with sqrt(N)),

Bf_powercalc<-function(sd, obtained, dfdata, uniform, lower=0, upper=1, meanoftheory=0, sdtheory=1, tail=2, N, min, max)
{
 
  x = c(0)
  y = c(0)
  dfadjust = N - dfdata
      # note: working out what the difference between N and df is (for the contrast between two groups, this is 2; for constraints where there is 4 groups this will         be 3, etc.)   

  for(newN in min : max)
  {
    B = as.numeric(Bf(sd = sd*sqrt(N/newN), obtained, dfdata = newN-dfadjust, uniform, lower, upper, meanoftheory, sdtheory, tail)[3])
    x= append(x,newN)   
    y= append(y,B)
    output = cbind(x,y)
    
  }  
  output = output[-1,]  
  return(output) 
  
}

Load dataset

The data file contains data for both input nouns and minimal exposure nouns. Analyses were conducted with non-alteranting nouns only, and therefore productions with alternating nouns have been removed from this data set. The data has also been filtered to remove trials not meeting the baseline criteria described in the paper (baseline criteria: trials were excluded if children had initially used an incorrect noun and been corrected by the experimenter, if they inserted an alternative word for a particle, or if they failed to include a particle. Children were not penalized for omitting to repeat the initial “moop”.)

alldata = read.csv("skewdata_Aug2016.csv")

Experiment 1 (children), input nouns

select appropriate data

exp1.child.trained = subset(alldata, agegroup == "child" & old_me == "old")

check proportion of missing data

For the lexical-skew and lexical-noskew conditions, the number of possible trials contributing to the analysis is 15 children * 4 nouns * 4 productions per noun making 240 trials; for the mixed condition it’s 15 children * 2 nouns * 4 productions per noun making 120 trials. There are no trials for the generalist condition. Tthe proportion of missing trials in each condition is therefore:

1-(round(table(exp1.child.trained$condition)/c(0,15*4*4,15*4*4,15*2*4),2)) 
## 
## gen_noskew lex_noskew   lex_skew      mixed 
##                  0.05       0.03       0.02

get the means

means for table 3

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.child.trained), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
lex_noskew 15 0.74 0.16 0.04 0.09
lex_skew 15 0.96 0.06 0.02 0.03
mixed 15 0.70 0.26 0.07 0.14

Separating out the nouns with the majority/ minority particle in the lexicalist-skewed condition (note: majority = 1, minority = 0).

kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.child.trained, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det N correct sd se ci
0 15 0.90 0.23 0.06 0.13
1 15 0.99 0.03 0.01 0.02

statistical analyses

contrast between conditions 1

lme analysis all data

set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.

exp1.child.trained= lizContrasts(exp1.child.trained, exp1.child.trained$condition, "lex_noskew") 
exp1.child.trained = lizCenter(exp1.child.trained, list("correct_det"))

run the lmer model:

child.trained.lmer = glmer(correct ~  (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained)

kable(summary(child.trained.lmer)$coefficients, 
digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.413 0.328 7.350 0.000
lex_noskew_VERSUS_mixed -0.050 0.579 -0.087 0.931
lex_noskew_VERSUS_lex_skew 3.114 0.741 4.204 0.000
correct_det.ct -0.348 0.706 -0.492 0.622
lex_noskew_VERSUS_mixed:correct_det.ct 0.124 1.327 0.093 0.926
lex_noskew_VERSUS_lex_skew:correct_det.ct 0.705 1.600 0.440 0.660

BF analyses corresponding to lme analysis all data

For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:

lex_noskew_VERSUS_mixed_child_trained_MEANDIFF = abs(summary(child.trained.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_child_trained_MEANDIFF
## [1] 0.05044683
lex_noskew_VERSUS_mixed_child_trained_SE = abs(summary(child.trained.lmer)$coefficients[8])
lex_noskew_VERSUS_mixed_child_trained_SE 
## [1] 0.5792097
lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF = abs(summary(child.trained.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF
## [1] 3.11429
lex_noskew_VERSUS_lex_skew_child_trained_SE = abs(summary(child.trained.lmer)$coefficients[9])
lex_noskew_VERSUS_lex_skew_child_trained_SE
## [1] 0.7407162

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_mixed_child_trained_SE , obtained=lex_noskew_VERSUS_mixed_child_trained_MEANDIFF  , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.08815955
## 
## $Likelihoodnull
## [1] 0.3938469
## 
## [[3]]
## [1] 0.2238422
Bf(sd=lex_noskew_VERSUS_lex_skew_child_trained_SE, obtained=lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1142007
## 
## $Likelihoodnull
## [1] 0.0003274736
## 
## [[3]]
## [1] 348.7324

comparing accuracy with majority and minority particle nouns within skewed language

lme analyis skewed data

select the subset of data from skewed langauge and center (i) the variable of interest (majority_det – which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable

exp1.child.trained.skew = subset(exp1.child.trained, condition == "lex_skew")
exp1.child.trained.skew= lizCenter(exp1.child.trained.skew, list("correct_det", "majority_det"))

Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants. The model with the interaction with the control variable and full random slopes structure didn’t converge and was systematically simplified.

Non-converging models:

child.trained.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct) +  (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)

child.trained.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct) +  (majority_det.ct + correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)

child.trained.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct) +  (majority_det.ct |participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)

Converging model:

child.trained.skew.lmer = glmer(correct ~  (majority_det.ct + correct_det.ct) +  (majority_det.ct |participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)

kable(summary(child.trained.skew.lmer)$coefficients, 
      digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 5.610 1.475 3.803 0.000
majority_det.ct -1.855 3.666 -0.506 0.613
correct_det.ct -2.525 2.299 -1.098 0.272

BF analysis corresponding to lme analysis for skewed data (comparing accuracy with majority and minority particle nouns within skewed language)

We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:

majority_VERSUS_minority_child_trained_MEANDIFF = abs(summary(child.trained.skew.lmer)$coefficients[2])
majority_VERSUS_minority_child_trained_MEANDIFF 
## [1] 1.85548
majority_VERSUS_minority_child_trained_SE = abs(summary(child.trained.skew.lmer)$coefficients[5])
majority_VERSUS_minority_child_trained_SE 
## [1] 3.666009

We now use these values in the Bf calculator along with the predicted difference of 2.161343 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=majority_VERSUS_minority_child_trained_SE , obtained=majority_VERSUS_minority_child_trained_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.161343, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.371252
## 
## $Likelihoodnull
## [1] 0.3464833
## 
## [[3]]
## [1] 1.071486

contrast between conditions 2

lme analysis - balanced data set

Remove two of the three majority particle nouns (specifically, those coded as 3 and 4) from the lex_skew condition. This will leave just two nouns - coded 1 and 2 - which go with the minority particle and majority particle respectively, so that majority and minority particle nouns contribute equally to the input.

exp1.child.trained$remove = 0
exp1.child.trained$remove[exp1.child.trained$condition == "lex_skew" & exp1.child.trained$noun == "trainednoun3" ]=1
exp1.child.trained$remove[exp1.child.trained$condition == "lex_skew" & exp1.child.trained$noun == "trainednoun4" ]=1
exp1.child.trained.balanced = subset(exp1.child.trained, remove == 0)

table(exp1.child.trained.balanced\(noun,exp1.child.trained.balanced\)condition )

set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function

exp1.child.trained.balanced= lizContrasts(exp1.child.trained.balanced, exp1.child.trained.balanced$condition, "lex_noskew") 
exp1.child.trained.balanced = lizCenter(exp1.child.trained.balanced, list("correct_det"))

run the lmer model:

child.trained.lmer.balanced = glmer(correct ~  (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.balanced)

kable(summary(child.trained.lmer.balanced)$coefficients, 
      digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.394 0.368 6.512 0.000
lex_noskew_VERSUS_mixed -0.033 0.626 -0.053 0.957
lex_noskew_VERSUS_lex_skew 3.004 0.838 3.585 0.000
correct_det.ct 0.092 0.773 0.119 0.906
lex_noskew_VERSUS_mixed:correct_det.ct 0.181 1.389 0.130 0.896
lex_noskew_VERSUS_lex_skew:correct_det.ct 1.714 1.759 0.975 0.330

BF analyses corresponding to lme analysis with balanced data set

For the contrast between lex_skew and lex_noskew, we require an estimate of the mean difference between the conditions and of the corresponding standard error and this is taken from the model above

lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF = abs(summary(child.trained.lmer.balanced)$coefficients[3])
lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF
## [1] 3.004239
lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE = abs(summary(child.trained.lmer.balanced)$coefficients[9])
lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE
## [1] 0.8378964

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE, obtained=lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1345978
## 
## $Likelihoodnull
## [1] 0.0016508
## 
## [[3]]
## [1] 81.53485

Experiment 1 (children), minimal exposure nouns

select appropriate data

exp1.child.me = subset(alldata, agegroup == "child" & old_me == "m.e.")

check proportion of missing data

In each condition, the number of possible trials contributing to the analysis is 15 children * 2 nouns * 4 productions per noun making 120 trials. The proportion of missing trials in each condition is therefore:

1-(round(table(exp1.child.me$condition)/c(15*2*4,15*2*4,15*2*4,15*2*4),2)) 
## 
## gen_noskew lex_noskew   lex_skew      mixed 
##       0.05       0.05       0.06       0.05

get the means

means for table 1

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.child.me), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
gen_noskew 15 0.58 0.27 0.07 0.15
lex_noskew 15 0.64 0.30 0.08 0.17
lex_skew 15 0.86 0.22 0.06 0.12
mixed 15 0.58 0.17 0.04 0.10

Separating out the nouns with the majority/ minority particle in the lexicalist-skewed condition.

kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.child.me, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det N correct sd se ci
0 15 0.77 0.42 0.11 0.23
1 15 0.92 0.22 0.06 0.12

statistical analyses

contrast between conditions

lme analysis

set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.

exp1.child.me= lizContrasts4(exp1.child.me, exp1.child.me$condition, "lex_noskew") 
exp1.child.me = lizCenter(exp1.child.me, list("correct_det"))

run the lmer model:

```r child.me.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = “bobyqa”), family = binomial, data = exp1.child.me)

kable(summary(child.me.lmer)$coefficients, digits = 3) ```

Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.637 0.355 4.607 0.000
lex_noskew_VERSUS_mixed -0.488 0.821 -0.595 0.552
lex_noskew_VERSUS_lex_skew 2.664 0.926 2.876 0.004
lex_noskew_VERSUS_gen_noskew -0.655 0.821 -0.798 0.425
correct_det.ct -0.364 0.792 -0.460 0.646
lex_noskew_VERSUS_mixed:correct_det.ct 0.379 1.927 0.197 0.844
lex_noskew_VERSUS_lex_skew:correct_det.ct -0.537 2.102 -0.256 0.798
lex_noskew_VERSUS_gen_noskew:correct_det.ct -0.790 1.933 -0.409 0.683

BF analyses corresponding to lme analysis all data

For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:

r lex_noskew_VERSUS_mixed_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[2]) lex_noskew_VERSUS_mixed_child_me_MEANDIFF

## [1] 0.4882844

r lex_noskew_VERSUS_mixed_child_me_SE = abs(summary(child.me.lmer)$coefficients[10]) lex_noskew_VERSUS_mixed_child_me_SE

## [1] 0.8213186

r lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[3]) lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF

## [1] 2.663743

r lex_noskew_VERSUS_lex_skew_child_me_SE = abs(summary(child.me.lmer)$coefficients[11]) lex_noskew_VERSUS_lex_skew_child_me_SE

## [1] 0.9260443

r lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[4]) lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF

## [1] 0.6548326

r lex_noskew_VERSUS_gen_noskew_child_me_SE = abs(summary(child.me.lmer)$coefficients[12]) lex_noskew_VERSUS_gen_noskew_child_me_SE

## [1] 0.8208123

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_mixed_child_me_SE , obtained=lex_noskew_VERSUS_mixed_child_me_MEANDIFF  , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1603362
## 
## $Likelihoodnull
## [1] 0.3296401
## 
## [[3]]
## [1] 0.4863978
Bf(sd=lex_noskew_VERSUS_lex_skew_child_me_SE, obtained=lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.165898
## 
## $Likelihoodnull
## [1] 0.009261317
## 
## [[3]]
## [1] 17.91301
Bf(sd=lex_noskew_VERSUS_gen_noskew_child_me_SE, obtained=lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1718672
## 
## $Likelihoodnull
## [1] 0.2854262
## 
## [[3]]
## [1] 0.6021424
BF power cacluation for minimal exposure lexical_unskewed versus mixed contrast

We found no substantial preference for either H1 over H0, or H0 over H1. We now consider how many more participants we might need in order to have evidence in each direction.

First, assume ACTUAL mean difference is ZERO: with this amount of variance, approx how many participants would we need to have substantial evidnece for the NULL (given that standard error scales with number of participants)?

x=Bf_powercalc(sd=lex_noskew_VERSUS_mixed_child_me_SE , obtained=0  , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1,N=30, min=10, max=600) 
plot(x)
abline(h=(1/3))

x[12:14,]                 
##       x         y
## [1,] 21 0.3398171
## [2,] 22 0.3328486
## [3,] 23 0.3262900

suggests we need an N of 22 (note that this is LESS than the current N of 30)

Second, assume the ACTUAL mean difference is the current estimate: with this amount of variance, approx how many participants would we need to have substantial evidnece for H1 (given that standard error scales with number of participants)?

x=Bf_powercalc(sd=lex_noskew_VERSUS_lex_skew_child_me_SE, obtained=lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1,N=30, min=10, max=600)  
plot(x)
abline(h=(3))

x[514:516,]                 
##        x            y
## [1,] 523 5.099459e+26
## [2,] 524 5.754409e+26
## [3,] 525 6.493489e+26

suggests we need an N of 524 (i.e. 262 per condition)

comparing accuracy with majority and minority particle nouns within skewed language

lme analyis skewed data

select the subset of data from skewed langauge and center (i) the variable of interest (majority_det – which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable

exp1.child.me.skew = subset(exp1.child.me, condition == "lex_skew")
exp1.child.me.skew= lizCenter(exp1.child.me.skew, list("correct_det", "majority_det"))

Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants. The model with the interaction with the control variable and full random slopes structure didn’t converge and was systematically simplified.

Non-converging models:

child.me.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct ) +  (majority_det.ct* correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)

child.me.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct ) +  (majority_det.ct+ correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)

Converging model:

child.me.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct ) +  (majority_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)

kable(summary(child.me.skew.lmer)$coefficients, 
digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 9.636 2.448 3.936 0.000
majority_det.ct -2.186 4.751 -0.460 0.646
correct_det.ct -0.712 3.292 -0.216 0.829
majority_det.ct:correct_det.ct 1.650 6.361 0.259 0.795

BF analysis corresponding to lme analysis for skewed data (comparing accuracy with majority and minority particle nouns within skewed language)

We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:

majority_VERSUS_minority_child_me_MEANDIFF = abs(summary(child.me.skew.lmer)$coefficients[2])
majority_VERSUS_minority_child_me_MEANDIFF 
## [1] 2.185635
majority_VERSUS_minority_child_me_SE = abs(summary(child.me.skew.lmer)$coefficients[6])
majority_VERSUS_minority_child_me_SE 
## [1] 4.751256

We now use these values in the Bf calculator along with the predicted difference of 1.261336 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=majority_VERSUS_minority_child_me_SE , obtained=majority_VERSUS_minority_child_me_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 1.261336, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.3774785
## 
## $Likelihoodnull
## [1] 0.3545033
## 
## [[3]]
## [1] 1.064809

Experiment 1 (children), reported awareness of lexicality

means for table 4 (awareness of lexicality)

trained nouns (aware = 1, unaware = 0)

kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.child.trained), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition lexicallyaware N correct sd se ci
lex_noskew 0 12 0.73 0.16 0.04 0.10
lex_noskew 1 3 0.75 0.22 0.12 0.54
lex_skew 0 7 0.99 0.03 0.01 0.02
lex_skew 1 8 0.94 0.07 0.03 0.06
mixed 0 11 0.73 0.28 0.08 0.19
mixed 1 4 0.63 0.20 0.10 0.32

minimal exposure nouns

kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.child.me), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition lexicallyaware N correct sd se ci
gen_noskew 0 11 0.62 0.25 0.07 0.17
gen_noskew 1 4 0.46 0.34 0.17 0.55
lex_noskew 0 12 0.63 0.29 0.09 0.19
lex_noskew 1 3 0.67 0.38 0.22 0.95
lex_skew 0 7 0.82 0.24 0.09 0.22
lex_skew 1 8 0.89 0.21 0.07 0.17
mixed 0 11 0.61 0.19 0.06 0.13
mixed 1 4 0.48 0.04 0.02 0.06

Experiment 2 (adults), trained nouns

select appropriate data

exp1.adult.trained = subset(alldata, agegroup == "adult" & old_me == "old")

check proportion of missing data

For the lexical-skew and lexical-noskew conditions, the number of possible trials contributing to the analysis is 15 adults * 4 nouns * 4 productions per noun making 240 trials; for the mixed condition it’s 15 adults * 2 nouns * 4 productions per noun making 120 trials. There was no generalist condition for adults. The proportion of missing trials in each condition is therefore:

r 1-(round(table(exp1.adult.trained$condition)/c(0,15*4*4,15*4*4,15*2*4),2))

## ## gen_noskew lex_noskew lex_skew mixed ## 0 0 0

get the means

means for table 3

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.adult.trained), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
lex_noskew 15 0.78 0.24 0.06 0.13
lex_skew 15 0.93 0.14 0.04 0.08
mixed 15 0.61 0.17 0.04 0.09

Separating out the nouns with the majority/ minority particle in the lexicalist-skewed condition.

kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.adult.trained, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det N correct sd se ci
0 15 0.88 0.27 0.07 0.15
1 15 0.94 0.12 0.03 0.06

statistical analyses

contrast between conditions 1

lme analysis all data

set up the data frame so that we can look at the contrasts between the lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.

exp1.adult.trained= lizContrasts(exp1.adult.trained, exp1.adult.trained$condition, "lex_noskew") 
exp1.adult.trained = lizCenter(exp1.adult.trained, list("correct_det"))

run the lmer model:

```r adult.trained.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = “bobyqa”), family = binomial, data = exp1.adult.trained)

kable(summary(adult.trained.lmer)$coefficients, digits = 3) ```

Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.996 0.304 6.557 0.000
lex_noskew_VERSUS_mixed -1.296 0.617 -2.099 0.036
lex_noskew_VERSUS_lex_skew 1.759 0.691 2.544 0.011
correct_det.ct -0.220 0.406 -0.541 0.589
lex_noskew_VERSUS_mixed:correct_det.ct -0.447 0.625 -0.715 0.475
lex_noskew_VERSUS_lex_skew:correct_det.ct -0.948 0.787 -1.205 0.228

BF analyses corresponding to lme analysis all data

For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:

r lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF = abs(summary(adult.trained.lmer)$coefficients[2]) lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF

## [1] 1.295501

r lex_noskew_VERSUS_mixed_adult_trained_SE = abs(summary(adult.trained.lmer)$coefficients[8]) lex_noskew_VERSUS_mixed_adult_trained_SE

## [1] 0.6171106

r lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF = abs(summary(adult.trained.lmer)$coefficients[3]) lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF

## [1] 1.759348

r lex_noskew_VERSUS_lex_skew_adult_trained_SE = abs(summary(adult.trained.lmer)$coefficients[9]) lex_noskew_VERSUS_lex_skew_adult_trained_SE

## [1] 0.6914691

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_mixed_adult_trained_SE , obtained=lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF  , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.15276
## 
## $Likelihoodnull
## [1] 0.04748425
## 
## [[3]]
## [1] 3.217067
Bf(sd=lex_noskew_VERSUS_lex_skew_adult_trained_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1584024
## 
## $Likelihoodnull
## [1] 0.01937472
## 
## [[3]]
## [1] 8.175725

comparing accuracy with majority and minority particle nouns within skewed language

lme analyis skewed data

select the subset of data from skewed langauge and center (i) the variable of interest (majority_det – which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable

exp1.adult.trained.skew = subset(exp1.adult.trained, condition == "lex_skew")
exp1.adult.trained.skew= lizCenter(exp1.adult.trained.skew, list("correct_det", "majority_det"))

Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants.

adult.trained.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct) +  (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.trained.skew)


kable(summary(adult.trained.skew.lmer)$coefficients, 
      digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.231 2.209 2.821 0.005
majority_det.ct 2.426 3.392 0.715 0.474
correct_det.ct 4.205 4.345 0.968 0.333
majority_det.ct:correct_det.ct 3.940 7.532 0.523 0.601

BF analysis corresponding to lme analysis for skewed data (comparing accuracy with majority and minority particle nouns within skewed language)

We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:

r majority_VERSUS_minority_adult_trained_MEANDIFF = abs(summary(adult.trained.skew.lmer)$coefficients[2]) majority_VERSUS_minority_adult_trained_MEANDIFF

## [1] 2.426323

r majority_VERSUS_minority_adult_trained_SE = abs(summary(adult.trained.skew.lmer)$coefficients[6]) majority_VERSUS_minority_adult_trained_SE

## [1] 3.391853

We now use these values in the Bf calculator along with the predicted difference of 2.161343 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=majority_VERSUS_minority_adult_trained_SE , obtained=majority_VERSUS_minority_adult_trained_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.161343, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.3596668
## 
## $Likelihoodnull
## [1] 0.3040798
## 
## [[3]]
## [1] 1.182804

contrast between conditions 2

lme analysis - balanced data set

Remove two of the three majority particle nouns (specifically, those coded as 3 and 4) from the lex_skew condition. This will leave just two nouns - coded 1 and 2 - which go with the minority particle and majority particle respectively, so that majority and minority particle nouns contribute equally to the input.

exp1.adult.trained$remove = 0
exp1.adult.trained$remove[exp1.adult.trained$condition == "lex_skew" & exp1.adult.trained$noun == "trainednoun3" ]=1
exp1.adult.trained$remove[exp1.adult.trained$condition == "lex_skew" & exp1.adult.trained$noun == "trainednoun4" ]=1
exp1.adult.trained.balanced = subset(exp1.adult.trained, remove == 0)

set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function

exp1.adult.trained.balanced= lizContrasts(exp1.adult.trained.balanced, exp1.adult.trained.balanced$condition, "lex_noskew") 
exp1.adult.trained.balanced = lizCenter(exp1.adult.trained.balanced, list("correct_det"))

run the lmer model:

```r adult.trained.lmer.balanced = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = “bobyqa”), family = binomial, data = exp1.adult.trained.balanced)

kable(summary(adult.trained.lmer.balanced)$coefficients, digits = 3) ```

Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.086 0.331 6.294 0.000
lex_noskew_VERSUS_mixed -1.241 0.565 -2.196 0.028
lex_noskew_VERSUS_lex_skew 2.149 0.808 2.660 0.008
correct_det.ct -0.665 0.529 -1.258 0.209
lex_noskew_VERSUS_mixed:correct_det.ct -0.322 0.660 -0.487 0.626
lex_noskew_VERSUS_lex_skew:correct_det.ct -1.955 1.262 -1.549 0.121

BF analyses corresponding to lme analysis with balanced data set

For the contrast between lex_skew and lex_noskew, we require an estimate of the mean difference between the conditions and of the corresponding standard error and this is taken from the model above

lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF = abs(summary(adult.trained.lmer.balanced)$coefficients[3])
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF
## [1] 2.148548
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE = abs(summary(adult.trained.lmer.balanced)$coefficients[9])
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE
## [1] 0.8077

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1679098
## 
## $Likelihoodnull
## [1] 0.01507205
## 
## [[3]]
## [1] 11.14048

Experiment 2 (adults), minimal exposure nouns

select appropriate data

exp1.adult.me = subset(alldata, agegroup == "adult" & old_me == "m.e.")

check proportion of missing data

In each condition, the number of possible trials contributing to the analysis is 15 children * 2 nouns * 4 productions per noun making 120 trials. There is no generalist condition for adults. The proportion of missing trials in each condition is therefore:

r 1-(round(table(exp1.adult.me$condition)/c(0,15*2*4,15*2*4,15*2*4),2))

## ## gen_noskew lex_noskew lex_skew mixed ## 0.00 0.00 0.01

get the means

means for table 1

kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.adult.me), measurevar="correct", groupvars = "condition"), digits = 2)
condition N correct sd se ci
lex_noskew 15 0.89 0.27 0.07 0.15
lex_skew 15 0.97 0.13 0.03 0.07
mixed 15 0.84 0.26 0.07 0.14

Separating out the nouns with the majority/ minority particle in the lexicalist-skewed condition.

kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.adult.me, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det N correct sd se ci
0 15 1.00 0.00 0.00 0.00
1 15 0.93 0.26 0.07 0.14

statistical analyses

contrast between conditions

lme analysis

set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.

exp1.adult.me= lizContrasts(exp1.adult.me, exp1.adult.me$condition, "lex_noskew") 
exp1.adult.me = lizCenter(exp1.adult.me, list("correct_det"))

run the lmer model:

```r adult.me.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = “bobyqa”), family = binomial, data = exp1.adult.me)

kable(summary(adult.me.lmer)$coefficients, digits = 3) ```

Estimate Std. Error z value Pr(>|z|)
(Intercept) 12.843 14.428 0.890 0.373
lex_noskew_VERSUS_mixed -0.704 1.991 -0.354 0.724
lex_noskew_VERSUS_lex_skew 11.826 43.104 0.274 0.784
correct_det.ct 7.554 28.962 0.261 0.794
lex_noskew_VERSUS_mixed:correct_det.ct -0.150 3.524 -0.043 0.966
lex_noskew_VERSUS_lex_skew:correct_det.ct 22.292 86.282 0.258 0.796

BF analyses corresponding to lme analysis all data

For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:

r lex_noskew_VERSUS_mixed_adult_me_MEANDIFF = abs(summary(adult.me.lmer)$coefficients[2]) lex_noskew_VERSUS_mixed_adult_me_MEANDIFF

## [1] 0.7039988

r lex_noskew_VERSUS_mixed_adult_me_SE = abs(summary(adult.me.lmer)$coefficients[8]) lex_noskew_VERSUS_mixed_adult_me_SE

## [1] 1.991358

r lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF = abs(summary(adult.me.lmer)$coefficients[3]) lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF

## [1] 11.82603

r lex_noskew_VERSUS_lex_skew_adult_me_SE = abs(summary(adult.me.lmer)$coefficients[9]) lex_noskew_VERSUS_lex_skew_adult_me_SE

## [1] 43.10405

We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=lex_noskew_VERSUS_mixed_adult_me_SE , obtained=lex_noskew_VERSUS_mixed_adult_me_MEANDIFF  , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2781582
## 
## $Likelihoodnull
## [1] 0.3706698
## 
## [[3]]
## [1] 0.7504205
Bf(sd=lex_noskew_VERSUS_lex_skew_adult_me_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.3858014
## 
## $Likelihoodnull
## [1] 0.3803004
## 
## [[3]]
## [1] 1.014465

comparing accuracy with majority and minority particle nouns within skewed language

lme analyis skewed data

select the subset of data from skewed langauge and center (i) the variable of interest (majority_det – which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable

exp1.adult.me.skew = subset(exp1.adult.me, condition == "lex_skew")
exp1.adult.me.skew= lizCenter(exp1.adult.me.skew, list("correct_det", "majority_det"))

Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants.

adult.me.skew.lmer = glmer(correct ~  (majority_det.ct * correct_det.ct) +  (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.me.skew)

kable(summary(adult.me.skew.lmer)$coefficients, 
      digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 26.934 16.232 1.659 0.097
majority_det.ct -12.148 28.535 -0.426 0.670
correct_det.ct 0.139 29.639 0.005 0.996
majority_det.ct:correct_det.ct 23.984 25.044 0.958 0.338

BF analysis corresponding to lme analysis for skewed data (comparing accuracy with majority and minority particle nouns within skewed language)

We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:

r majority_VERSUS_minority_adult_me_MEANDIFF = abs(summary(adult.me.skew.lmer)$coefficients[2]) majority_VERSUS_minority_adult_me_MEANDIFF

## [1] 12.14758

r majority_VERSUS_minority_adult_me_SE = abs(summary(adult.me.skew.lmer)$coefficients[6]) majority_VERSUS_minority_adult_me_SE

## [1] 28.53479

We now use these values in the Bf calculator along with the predicted difference of 1.261336 (obtained from equivalent analsyes over data from Wonnacott (2011))

Bf(sd=majority_VERSUS_minority_adult_me_SE , obtained=majority_VERSUS_minority_adult_me_MEANDIFF , dfdata = 28, uniform = 0, sdtheory = 1.261336, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.3646299
## 
## $Likelihoodnull
## [1] 0.3600854
## 
## [[3]]
## [1] 1.012621

Experiment 2 (adults), reported awareness of lexicality

means for table 5 (awareness of lexicality)

trained nouns (aware = 1, unaware = 0)

kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.trained), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition lexicallyaware N correct sd se ci
lex_noskew 1 15 0.78 0.24 0.06 0.13
lex_skew 0 1 1.00 NA NA NA
lex_skew 1 14 0.92 0.14 0.04 0.08
mixed 0 6 0.60 0.18 0.08 0.19
mixed 1 9 0.61 0.17 0.06 0.13

minimal exposure nouns

means for table 5 (awareness of lexicality)

kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.me), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition lexicallyaware N correct sd se ci
lex_noskew 1 15 0.89 0.27 0.07 0.15
lex_skew 0 1 1.00 NA NA NA
lex_skew 1 14 0.96 0.13 0.04 0.08
mixed 0 6 0.81 0.35 0.14 0.37
mixed 1 9 0.86 0.19 0.06 0.15

statistical comparison

Comparing the extent of reported awareness of lexical patterns in the different conditions using fishers exact test

awaretable = with(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.me), 
                  table(condition, lexicallyaware))

kable(awaretable)
0 1
gen_noskew 0 0
lex_noskew 0 15
lex_skew 1 14
mixed 6 9
# mixed versus lexicalist-unskewed
fisher.test(rbind(awaretable[2,], awaretable[4,]))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  rbind(awaretable[2, ], awaretable[4, ])
## p-value = 0.01686
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0000000 0.6740343
## sample estimates:
## odds ratio 
##          0
# mixed versus lexicalist-skewed
fisher.test(rbind(awaretable[3,], awaretable[4,]))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  rbind(awaretable[3, ], awaretable[4, ])
## p-value = 0.08008
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.002183889 1.190995284
## sample estimates:
## odds ratio 
##  0.1151766