rm(list=ls())
library(ggplot2)
library(plotrix)
#suppressPackageStartupMessages(library(dplyr))
library(plyr)
#library(bootstrap)
suppressPackageStartupMessages(library(lme4))
library(Kendall)
## Warning: package 'Kendall' was built under R version 3.3.2
library(knitr)
library(reshape2)
theme_set(theme_bw())
opts_chunk$set(fig.width=8, fig.height=5,
echo=TRUE, warning=FALSE, message=FALSE, cache=TRUE)
This function can be found on the website “Cookbook for R”.
http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper
It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
conf.interval=.95, .drop=TRUE) {
require(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
return(datac)
}
This function ouputs the centered values of a variable, which can be a numeric variable, a factor, or a data frame. It was taken from Florian Jaegers blog https://hlplab.wordpress.com/2009/04/27/centering-several-variables/.
From his blog:
If the input is a numeric variable, the output is the centered variable.
If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor’s levels are converted into numerical values in their inherent order (if not specified otherwise, R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.
If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by “c” (e.g. “Variable1” will be “cVariable1”).
myCenter= function(x) {
if (is.numeric(x)) { return(x - mean(x, na.rm=T)) }
if (is.factor(x)) {
x= as.numeric(x)
return(x - mean(x, na.rm=T))
}
if (is.data.frame(x) || is.matrix(x)) {
m= matrix(nrow=nrow(x), ncol=ncol(x))
colnames(m)= paste("c", colnames(x), sep="")
for (i in 1:ncol(x)) {
m[,i]= myCenter(x[,i])
}
return(as.data.frame(m))
}
}
This function provides a wrapper around myCenter allowing you to center a specific list of variables from a data frame.
The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labelled with the each column’s previous name, but with “.ct” appended (e.g., “variable1” will become “variable1.ct”).
lizCenter= function(x, listfname)
{
for (i in 1:length(listfname))
{
fname = as.character(listfname[i])
x[paste(fname,".ct", sep="")] = myCenter(x[fname])
}
return(x)
}
This function can be used to create two centered dummy variables which stand in place of a three-way factor (condition). This allows us to inspect each contrast separately, as well as their interactions with other factors. Other fixed effects in the model can be evaluated as the average effects across all levels of the factor.
The function takes a data frame (d), a factor from that database (condition), which must have three levels, and the name of the level of the factor which is to be used as the baseline for the contrasts (baselevel).
For example, if d is a data frame with a factor “condition” with three levels (“lex_skew” lex_noskew" “mixed”) then lizContrasts(d, d$condition, “lex_noskew”) returns a data frame with two (numeric) columns added labelled “lex_noskew_VERSUS_lex_mixed” and “lex_noskew_VERSUS_lex_skew”. Wherever you would normally use “condition” in a formula in an lme, it can be replaced by (lex_noskew_VERSUS_lex_mixed + “lex_noskew_VERSUS_lex_skew) e.g. ~ (a * condition) becomes ~ (a * (lex_noskew_VERSUS_lex_mixed + lex_noskew_VERSUS_lex_skew)).
lizContrasts= function(d, condition, baselevel)
{
condition = factor(condition)
condition = relevel(condition, baselevel)
a= (contrasts(condition)-apply(contrasts(condition),2,mean))
d$dummy1[condition== rownames(a)[1]] <- a[1]
d$dummy1[condition== rownames(a)[2]] <- a[2]
d$dummy1[condition== rownames(a)[3]] <- a[3]
d$dummy2[condition== rownames(a)[1]] <- a[4]
d$dummy2[condition== rownames(a)[2]] <- a[5]
d$dummy2[condition== rownames(a)[3]] <- a[6]
name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")
d[name1] = d$dummy1
d[name2] = d$dummy2
d$dummy1 <-NULL
d$dummy2 <-NULL
return(d)
}
This function is a version of the previous lizContrasts function but to be used to create three centered dummy variables which stand in place of a four-way factor (condition).
lizContrasts4= function(d, condition, baselevel)
{
condition = factor(condition)
condition = relevel(condition, baselevel)
a= (contrasts(condition)-apply(contrasts(condition),2,mean))
d$dummy1[condition== rownames(a)[1]] <- a[1]
d$dummy1[condition== rownames(a)[2]] <- a[2]
d$dummy1[condition== rownames(a)[3]] <- a[3]
d$dummy1[condition== rownames(a)[4]] <- a[4]
d$dummy2[condition== rownames(a)[1]] <- a[5]
d$dummy2[condition== rownames(a)[2]] <- a[6]
d$dummy2[condition== rownames(a)[3]] <- a[7]
d$dummy2[condition== rownames(a)[4]] <- a[8]
d$dummy3[condition== rownames(a)[1]] <- a[9]
d$dummy3[condition== rownames(a)[2]] <- a[10]
d$dummy3[condition== rownames(a)[3]] <- a[11]
d$dummy3[condition== rownames(a)[4]] <- a[12]
name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")
name3 = paste(baselevel, rownames(a)[4],sep="_VERSUS_")
d[name1] = d$dummy1
d[name2] = d$dummy2
d[name3] = d$dummy3
d$dummy1 <-NULL
d$dummy2 <-NULL
d$dummy3 <-NULL
return(d)
}
This function is equivalent to the Dienes (2008) calculator which can be found here: http://www.lifesci.sussex.ac.uk/home/Zoltan_Dienes/inference/Bayes.htm.
The code was provided by Baguely and Kayne (2010) and can be found here: http://www.academia.edu/427288/Review_of_Understanding_psychology_as_a_science_An_introduction_to_scientific_and_statistical_inference
Bf<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0,sdtheory=1, tail=2){
area <- 0
if(identical(uniform, 1)){
theta <- lower
range <- upper - lower
incr <- range / 2000
for (A in -1000:1000){
theta <- theta + incr
dist_theta <- 1 / range
height <- dist_theta * dnorm(obtained, theta, sd)
area <- area + height * incr
}
}else
{theta <- meanoftheory - 5 * sdtheory
incr <- sdtheory / 200
for (A in -1000:1000){
theta <- theta + incr
dist_theta <- dnorm(theta, meanoftheory, sdtheory)
if(identical(tail, 1)){
if (theta <= 0){
dist_theta <- 0
} else {
dist_theta <- dist_theta * 2
}
}
height <- dist_theta * dnorm(obtained, theta, sd)
area <- area + height * incr
}
}
LikelihoodTheory <- area
Likelihoodnull <- dnorm(obtained, 0, sd)
BayesFactor <- LikelihoodTheory / Likelihoodnull
ret <- list("LikelihoodTheory" = LikelihoodTheory,"Likelihoodnull" = Likelihoodnull, "BayesFactor" = BayesFactor)
ret
}
This works with the Bf funciton above. It requires the same values as that function (i.e. the obtained mean and SE for the current sample, a value for the predicted mean, which is set to be sdtheory (with meanoftheory=0), and the current number of participants N). However rather than return a BF for the current sample, it works out what the BF would be for a range of different subject numbers (assuming that the SE scales with sqrt(N)),
Bf_powercalc<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0, sdtheory=1, tail=2, N, min, max)
{
x = c(0)
y = c(0)
# note: working out what the difference between N and df is (for the contrast between two groups, this is 2; for constraints where there is 4 groups this will be 3, etc.)
for(newN in min : max)
{
B = as.numeric(Bf(sd = sd*sqrt(N/newN), obtained, uniform, lower, upper, meanoftheory, sdtheory, tail)[3])
x= append(x,newN)
y= append(y,B)
output = cbind(x,y)
}
output = output[-1,]
return(output)
}
The data file contains data for both input nouns and minimal exposure nouns.
data = read.csv("skewdata_Jan2017.csv")
Filter the data to produce the final dataset (“alldata”) for analyses.
# Analyses were conducted with non-alteranting nouns only - remove productions with alternating nouns.
alldata = subset(data, noun_type!="alternating")
# Remove trials where the participant used an incorrect noun.
alldata = subset(alldata, noun_correct=="1")
# Remove trials where the participant produced an alternative word for a particle.
alldata = subset(alldata, det_used_coded!="other")
# Remove trials where the participant did not produe a particle at all.
alldata = subset(alldata, det_used_coded!="none")
Get the means for each participant.
aggregated.child.age.listsp = aggregate(cbind(age_months, listeningspanRAW, listeningspan) ~ participantid + condition, subset(alldata, agegroup == "child"), FUN=mean)
Get the values for Table 1.
kable(summarySE(aggregated.child.age.listsp, measurevar="age_months", groupvars = "condition"), digits = 2)
condition | N | age_months | sd | se | ci |
---|---|---|---|---|---|
gen_noskew | 15 | 74.27 | 4.10 | 1.06 | 2.27 |
lex_noskew | 15 | 73.60 | 5.05 | 1.30 | 2.80 |
lex_skew | 15 | 75.07 | 4.15 | 1.07 | 2.30 |
mixed | 15 | 72.47 | 5.66 | 1.46 | 3.13 |
kable(summarySE(aggregated.child.age.listsp, measurevar="listeningspan", groupvars = "condition"), digits = 2)
condition | N | listeningspan | sd | se | ci |
---|---|---|---|---|---|
gen_noskew | 15 | 107.27 | 20.78 | 5.37 | 11.51 |
lex_noskew | 15 | 106.80 | 20.00 | 5.16 | 11.08 |
lex_skew | 15 | 108.93 | 10.00 | 2.58 | 5.54 |
mixed | 15 | 102.33 | 15.06 | 3.89 | 8.34 |
Since our contrasts compare the lexicalist-unskewed condition against each of the other conditions, use t-tests to compare the mean age and listening span of this group against those of each of the other groups - no significant differences were found (note: for listensing span, we use raw scores in statistical analyses).
# Age
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months, subset(aggregated.child.age.listsp, condition == "lex_skew")$age_months)
##
## Welch Two Sample t-test
##
## data: subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months and subset(aggregated.child.age.listsp, condition == "lex_skew")$age_months
## t = -0.86875, df = 26.975, p-value = 0.3926
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.930801 1.997468
## sample estimates:
## mean of x mean of y
## 73.60000 75.06667
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months, subset(aggregated.child.age.listsp, condition == "mixed")$age_months)
##
## Welch Two Sample t-test
##
## data: subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months and subset(aggregated.child.age.listsp, condition == "mixed")$age_months
## t = 0.57873, df = 27.654, p-value = 0.5675
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.880318 5.146984
## sample estimates:
## mean of x mean of y
## 73.60000 72.46667
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months, subset(aggregated.child.age.listsp, condition == "gen_noskew")$age_months)
##
## Welch Two Sample t-test
##
## data: subset(aggregated.child.age.listsp, condition == "lex_noskew")$age_months and subset(aggregated.child.age.listsp, condition == "gen_noskew")$age_months
## t = -0.39688, df = 26.849, p-value = 0.6946
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.114151 2.780818
## sample estimates:
## mean of x mean of y
## 73.60000 74.26667
# Listening Span
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW, subset(aggregated.child.age.listsp, condition == "lex_skew")$listeningspanRAW)
##
## Welch Two Sample t-test
##
## data: subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW and subset(aggregated.child.age.listsp, condition == "lex_skew")$listeningspanRAW
## t = -1.2292, df = 19.395, p-value = 0.2337
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.8804211 0.7470878
## sample estimates:
## mean of x mean of y
## 6.133333 7.200000
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW, subset(aggregated.child.age.listsp, condition == "mixed")$listeningspanRAW)
##
## Welch Two Sample t-test
##
## data: subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW and subset(aggregated.child.age.listsp, condition == "mixed")$listeningspanRAW
## t = 0.73102, df = 26.394, p-value = 0.4712
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.327215 2.793882
## sample estimates:
## mean of x mean of y
## 6.133333 5.400000
t.test(subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW, subset(aggregated.child.age.listsp, condition == "gen_noskew")$listeningspanRAW)
##
## Welch Two Sample t-test
##
## data: subset(aggregated.child.age.listsp, condition == "lex_noskew")$listeningspanRAW and subset(aggregated.child.age.listsp, condition == "gen_noskew")$listeningspanRAW
## t = -0.42477, df = 26.879, p-value = 0.6744
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.110134 2.043467
## sample estimates:
## mean of x mean of y
## 6.133333 6.666667
exp1.child.trained = subset(alldata, agegroup == "child" & old_me == "old")
For the lexicalist-skewed and lexicalist-unskewed conditions the number of possible trials contributing to the analysis is 15 children * 4 nouns * 4 productions per noun making 240 trials; for the mixed condition it’s 15 children * 2 nouns * 4 productions per noun making 120 trials. There are no trials for the generalist condition. The proportion of missing trials in each condition is therefore:
1-(round(table(exp1.child.trained$condition)/c(0,15*4*4,15*4*4,15*2*4),2))
##
## gen_noskew lex_noskew lex_skew mixed
## 0.05 0.03 0.02
kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.child.trained), measurevar="correct", groupvars = "condition"), digits = 2)
condition | N | correct | sd | se | ci |
---|---|---|---|---|---|
lex_noskew | 15 | 0.74 | 0.16 | 0.04 | 0.09 |
lex_skew | 15 | 0.96 | 0.06 | 0.02 | 0.03 |
mixed | 15 | 0.70 | 0.26 | 0.07 | 0.14 |
Separate out the nouns with the majority/ minority particle in the lexicalist-skewed condition (majority = 1, minority = 0).
kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.child.trained, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det | N | correct | sd | se | ci |
---|---|---|---|---|---|
0 | 15 | 0.90 | 0.23 | 0.06 | 0.13 |
1 | 15 | 0.99 | 0.03 | 0.01 | 0.02 |
Set up the data frame so that we can look at the contrasts bewteen the lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.
exp1.child.trained = lizContrasts(exp1.child.trained, exp1.child.trained$condition, "lex_noskew")
exp1.child.trained = lizCenter(exp1.child.trained, list("correct_det"))
run the lmer model:
child.trained.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid), control = glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained)
kable(summary(child.trained.lmer)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 2.413 | 0.328 | 7.350 | 0.000 |
lex_noskew_VERSUS_mixed | -0.050 | 0.579 | -0.087 | 0.931 |
lex_noskew_VERSUS_lex_skew | 3.114 | 0.741 | 4.205 | 0.000 |
correct_det.ct | -0.348 | 0.706 | -0.493 | 0.622 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.124 | 1.327 | 0.093 | 0.926 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | 0.705 | 1.600 | 0.440 | 0.660 |
For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:
lex_noskew_VERSUS_mixed_child_trained_MEANDIFF = abs(summary(child.trained.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_child_trained_MEANDIFF
## [1] 0.05044866
lex_noskew_VERSUS_mixed_child_trained_SE = abs(summary(child.trained.lmer)$coefficients[8])
lex_noskew_VERSUS_mixed_child_trained_SE
## [1] 0.5791859
lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF = abs(summary(child.trained.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF
## [1] 3.114288
lex_noskew_VERSUS_lex_skew_child_trained_SE = abs(summary(child.trained.lmer)$coefficients[9])
lex_noskew_VERSUS_lex_skew_child_trained_SE
## [1] 0.7406886
We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011).
Bf(sd=lex_noskew_VERSUS_mixed_child_trained_SE , obtained=lex_noskew_VERSUS_mixed_child_trained_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1525399
##
## $Likelihoodnull
## [1] 0.6861903
##
## $BayesFactor
## [1] 0.2222996
Bf(sd=lex_noskew_VERSUS_lex_skew_child_trained_SE, obtained=lex_noskew_VERSUS_lex_skew_child_trained_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.154154
##
## $Likelihoodnull
## [1] 7.806013e-05
##
## $BayesFactor
## [1] 1974.811
Select data from the skewed condition only, and center (i) the variable of interest (majority_det - which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable.
exp1.child.trained.skew = subset(exp1.child.trained, condition == "lex_skew")
exp1.child.trained.skew = lizCenter(exp1.child.trained.skew, list("correct_det", "majority_det"))
Run an lme to see if participants are more accurate with the majority particle than the minority particle. Note that “majority_det”" is within participants. The model with the interaction with the control variable and full random slopes structure did not converge and was systematically simplified.
Non-converging models:
child.trained.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)
child.trained.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct + correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)
child.trained.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)
Converging model:
child.trained.skew.lmer = glmer(correct ~ (majority_det.ct + correct_det.ct) + (majority_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.skew)
kable(summary(child.trained.skew.lmer)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 5.610 | 1.476 | 3.802 | 0.000 |
majority_det.ct | -1.855 | 3.666 | -0.506 | 0.613 |
correct_det.ct | -2.525 | 2.299 | -1.098 | 0.272 |
We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:
majority_VERSUS_minority_child_trained_MEANDIFF = abs(summary(child.trained.skew.lmer)$coefficients[2])
majority_VERSUS_minority_child_trained_MEANDIFF
## [1] 1.855476
majority_VERSUS_minority_child_trained_SE = abs(summary(child.trained.skew.lmer)$coefficients[5])
majority_VERSUS_minority_child_trained_SE
## [1] 3.666387
We now use these values in the Bf calculator along with the predicted difference of 2.161343 (obtained from equivalent analsyes over data from Wonnacott, 2011).
Bf(sd=majority_VERSUS_minority_child_trained_SE , obtained=majority_VERSUS_minority_child_trained_MEANDIFF , uniform = 0, sdtheory = 2.161343, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1023352
##
## $Likelihoodnull
## [1] 0.09573202
##
## $BayesFactor
## [1] 1.068976
Remove two of the three majority particle nouns (specifically, those coded as trainednoun3 and trainednoun4) from the lex_skew condition. This will leave just two nouns - coded as trainednoun1 and trainednoun2 - which go with the minority particle and majority particle respectively, so that majority and minority particle nouns contribute equally to the input.
exp1.child.trained$remove = 0
exp1.child.trained$remove[exp1.child.trained$condition == "lex_skew" & exp1.child.trained$noun == "trainednoun3" ]=1
exp1.child.trained$remove[exp1.child.trained$condition == "lex_skew" & exp1.child.trained$noun == "trainednoun4" ]=1
exp1.child.trained.balanced = subset(exp1.child.trained, remove == 0)
kable(summarySE(aggregate(correct ~ participantid + condition, FUN = mean, data = subset(exp1.child.trained.balanced, condition=="lex_skew")), measurevar="correct", groupvars = "condition"), digits = 2)
condition | N | correct | sd | se | ci |
---|---|---|---|---|---|
lex_skew | 15 | 0.95 | 0.12 | 0.03 | 0.07 |
For the lme analysis, set up the data frame so that we can look at the contrasts bewteen lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function
exp1.child.trained.balanced = lizContrasts(exp1.child.trained.balanced, exp1.child.trained.balanced$condition, "lex_noskew")
exp1.child.trained.balanced = lizCenter(exp1.child.trained.balanced, list("correct_det"))
Run the lmer model:
child.trained.lmer.balanced = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.balanced)
kable(summary(child.trained.lmer.balanced)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 2.394 | 0.368 | 6.512 | 0.000 |
lex_noskew_VERSUS_mixed | -0.033 | 0.626 | -0.053 | 0.957 |
lex_noskew_VERSUS_lex_skew | 3.004 | 0.838 | 3.586 | 0.000 |
correct_det.ct | 0.092 | 0.773 | 0.119 | 0.906 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.181 | 1.388 | 0.130 | 0.896 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | 1.714 | 1.759 | 0.975 | 0.330 |
For the contrast between lex_skew and lex_noskew, we require an estimate of the mean difference between the conditions and of the corresponding standard error and this is taken from the model above.
lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF = abs(summary(child.trained.lmer.balanced)$coefficients[3])
lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF
## [1] 3.004237
lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE = abs(summary(child.trained.lmer.balanced)$coefficients[9])
lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE
## [1] 0.8378554
We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011).
Bf(sd=lex_noskew_VERSUS_lex_skew_child_trained_balanced_SE, obtained=lex_noskew_VERSUS_lex_skew_child_trained_balanced_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1607545
##
## $Likelihoodnull
## [1] 0.0007690216
##
## $BayesFactor
## [1] 209.0377
exp1.child.me = subset(alldata, agegroup == "child" & old_me == "m.e.")
In each condition, the number of possible trials contributing to the analysis is 15 children * 2 nouns * 4 productions per noun making 120 trials. The proportion of missing trials in each condition is therefore:
1-(round(table(exp1.child.me$condition)/c(15*2*4,15*2*4,15*2*4,15*2*4),2))
##
## gen_noskew lex_noskew lex_skew mixed
## 0.05 0.05 0.06 0.05
kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.child.me), measurevar="correct", groupvars = "condition"), digits = 2)
condition | N | correct | sd | se | ci |
---|---|---|---|---|---|
gen_noskew | 15 | 0.58 | 0.27 | 0.07 | 0.15 |
lex_noskew | 15 | 0.64 | 0.30 | 0.08 | 0.17 |
lex_skew | 15 | 0.86 | 0.22 | 0.06 | 0.12 |
mixed | 15 | 0.58 | 0.17 | 0.04 | 0.10 |
Separate out the nouns with the majority/ minority particle in the lexicalist-skewed condition.
kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.child.me, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det | N | correct | sd | se | ci |
---|---|---|---|---|---|
0 | 15 | 0.77 | 0.42 | 0.11 | 0.23 |
1 | 15 | 0.92 | 0.22 | 0.06 | 0.12 |
Set up the data frame so that we can look at the contrasts bewteen the lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.
exp1.child.me = lizContrasts4(exp1.child.me, exp1.child.me$condition, "lex_noskew")
exp1.child.me = lizCenter(exp1.child.me, list("correct_det"))
run the lmer model:
child.me.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me)
kable(summary(child.me.lmer)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 1.716 | 0.382 | 4.490 | 0.000 |
lex_noskew_VERSUS_mixed | -0.499 | 0.870 | -0.573 | 0.566 |
lex_noskew_VERSUS_lex_skew | 2.792 | 0.978 | 2.856 | 0.004 |
lex_noskew_VERSUS_gen_noskew | -0.687 | 0.868 | -0.792 | 0.428 |
correct_det.ct | -0.479 | 0.865 | -0.554 | 0.580 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.836 | 2.091 | 0.400 | 0.689 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | -0.033 | 2.266 | -0.015 | 0.988 |
lex_noskew_VERSUS_gen_noskew:correct_det.ct | -0.372 | 2.095 | -0.177 | 0.859 |
For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:
lex_noskew_VERSUS_mixed_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_child_me_MEANDIFF
## [1] 0.4990376
lex_noskew_VERSUS_mixed_child_me_SE = abs(summary(child.me.lmer)$coefficients[10])
lex_noskew_VERSUS_mixed_child_me_SE
## [1] 0.8704393
lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF
## [1] 2.791975
lex_noskew_VERSUS_lex_skew_child_me_SE = abs(summary(child.me.lmer)$coefficients[11])
lex_noskew_VERSUS_lex_skew_child_me_SE
## [1] 0.9777031
lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF = abs(summary(child.me.lmer)$coefficients[4])
lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF
## [1] 0.687495
lex_noskew_VERSUS_gen_noskew_child_me_SE = abs(summary(child.me.lmer)$coefficients[12])
lex_noskew_VERSUS_gen_noskew_child_me_SE
## [1] 0.8682298
We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011).
Bf(sd=lex_noskew_VERSUS_mixed_child_me_SE , obtained=lex_noskew_VERSUS_mixed_child_me_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1931586
##
## $Likelihoodnull
## [1] 0.3888633
##
## $BayesFactor
## [1] 0.4967261
Bf(sd=lex_noskew_VERSUS_lex_skew_child_me_SE, obtained=lex_noskew_VERSUS_lex_skew_child_me_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1723634
##
## $Likelihoodnull
## [1] 0.006917174
##
## $BayesFactor
## [1] 24.91818
Bf(sd=lex_noskew_VERSUS_gen_noskew_child_me_SE, obtained=lex_noskew_VERSUS_gen_noskew_child_me_MEANDIFF, uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2085792
##
## $Likelihoodnull
## [1] 0.335833
##
## $BayesFactor
## [1] 0.6210801
We found no substantial preference for either H1 over H0, or H0 over H1. We now consider how many more participants we might need in order to have evidence in each direction.
First, assume that the ACTUAL mean difference is ZERO: with this amount of variance, approximately how many participants would we need to have substantial evidence for the NULL (given that standard error scales with number of participants)?
x=Bf_powercalc(sd=lex_noskew_VERSUS_mixed_child_me_SE , obtained=0 , uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1,N=30, min=10, max=600)
plot(x)
abline(h=(1/3))
x[12:14,]
## x y
## [1,] 21 0.3549773
## [2,] 22 0.3478424
## [3,] 23 0.3411234
This suggests we need an N of 22 (note that this is LESS than the current N of 30).
Second, assume the ACTUAL mean difference is the current estimate: with this amount of variance, approx how many participants would we need to have substantial evidence for H1 (given that standard error scales with number of participants)?
x=Bf_powercalc(sd=lex_noskew_VERSUS_mixed_child_me_SE, obtained=lex_noskew_VERSUS_mixed_child_me_MEANDIFF , uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1,N=30, min=10, max=600)
plot(x)
abline(h=(3))
x[511:513,]
## x y
## [1,] 520 2.547936
## [2,] 521 2.559616
## [3,] 522 2.571353
This suggests we need an N of 521 (i.e., we would need 261 children per condition).
Select only data from the skewed condition and center (i) the variable of interest (majority_det - which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable.
exp1.child.me.skew = subset(exp1.child.me, condition == "lex_skew")
exp1.child.me.skew= lizCenter(exp1.child.me.skew, list("correct_det", "majority_det"))
Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants. The model with the interaction with the control variable and full random slopes structure didn’t converge and was systematically simplified.
Non-converging models:
# child.me.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)
# child.me.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct + correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)
Converging model:
child.me.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.skew)
kable(summary(child.me.skew.lmer)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 9.636 | 2.443 | 3.944 | 0.000 |
majority_det.ct | -2.186 | 4.715 | -0.464 | 0.643 |
correct_det.ct | -0.712 | 3.290 | -0.216 | 0.829 |
majority_det.ct:correct_det.ct | 1.650 | 6.359 | 0.259 | 0.795 |
We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:
majority_VERSUS_minority_child_me_MEANDIFF = abs(summary(child.me.skew.lmer)$coefficients[2])
majority_VERSUS_minority_child_me_MEANDIFF
## [1] 2.185645
majority_VERSUS_minority_child_me_SE = abs(summary(child.me.skew.lmer)$coefficients[6])
majority_VERSUS_minority_child_me_SE
## [1] 4.715499
We now use these values in the Bf calculator along with the predicted difference of 1.261336 (obtained from equivalent analsyes over data from Wonnacott, 2011).
Bf(sd=majority_VERSUS_minority_child_me_SE , obtained=majority_VERSUS_minority_child_me_MEANDIFF , uniform = 0, sdtheory = 1.261336, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.0808302
##
## $Likelihoodnull
## [1] 0.07598568
##
## $BayesFactor
## [1] 1.063756
Trained nouns (aware = 1, unaware = 0)
kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.child.trained), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition | lexicallyaware | N | correct | sd | se | ci |
---|---|---|---|---|---|---|
lex_noskew | 0 | 12 | 0.73 | 0.16 | 0.04 | 0.10 |
lex_noskew | 1 | 3 | 0.75 | 0.22 | 0.12 | 0.54 |
lex_skew | 0 | 7 | 0.99 | 0.03 | 0.01 | 0.02 |
lex_skew | 1 | 8 | 0.94 | 0.07 | 0.03 | 0.06 |
mixed | 0 | 11 | 0.73 | 0.28 | 0.08 | 0.19 |
mixed | 1 | 4 | 0.63 | 0.20 | 0.10 | 0.32 |
Minimal Exposure Nouns
kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.child.me), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition | lexicallyaware | N | correct | sd | se | ci |
---|---|---|---|---|---|---|
gen_noskew | 0 | 11 | 0.62 | 0.25 | 0.07 | 0.17 |
gen_noskew | 1 | 4 | 0.46 | 0.34 | 0.17 | 0.55 |
lex_noskew | 0 | 12 | 0.63 | 0.29 | 0.09 | 0.19 |
lex_noskew | 1 | 3 | 0.67 | 0.38 | 0.22 | 0.95 |
lex_skew | 0 | 7 | 0.82 | 0.24 | 0.09 | 0.22 |
lex_skew | 1 | 8 | 0.89 | 0.21 | 0.07 | 0.17 |
mixed | 0 | 11 | 0.61 | 0.19 | 0.06 | 0.13 |
mixed | 1 | 4 | 0.48 | 0.04 | 0.02 | 0.06 |
exp1.adult.trained = subset(alldata, agegroup == "adult" & old_me == "old")
For the lexicalist-skewed and lexicalist-unskewed conditions, the number of possible trials contributing to the analysis is 15 adults * 4 nouns * 4 productions per noun making 240 trials; for the mixed condition it’s 15 adults * 2 nouns * 4 productions per noun making 120 trials. There was no generalist condition for adults. The proportion of missing trials in each condition is therefore:
1-(round(table(exp1.adult.trained$condition)/c(0,15*4*4,15*4*4,15*2*4),2))
##
## gen_noskew lex_noskew lex_skew mixed
## 0 0 0
kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.adult.trained), measurevar="correct", groupvars = "condition"), digits = 2)
condition | N | correct | sd | se | ci |
---|---|---|---|---|---|
lex_noskew | 15 | 0.78 | 0.24 | 0.06 | 0.13 |
lex_skew | 15 | 0.93 | 0.14 | 0.04 | 0.08 |
mixed | 15 | 0.61 | 0.17 | 0.04 | 0.09 |
Separate out the nouns with the majority/ minority particle in the lexicalist-skewed condition.
kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.adult.trained, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det | N | correct | sd | se | ci |
---|---|---|---|---|---|
0 | 15 | 0.88 | 0.27 | 0.07 | 0.15 |
1 | 15 | 0.94 | 0.12 | 0.03 | 0.06 |
Set up the data frame so that we can look at the contrasts between the lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.
exp1.adult.trained = lizContrasts(exp1.adult.trained, exp1.adult.trained$condition, "lex_noskew")
exp1.adult.trained = lizCenter(exp1.adult.trained, list("correct_det"))
Run the lmer model:
adult.trained.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.trained)
kable(summary(adult.trained.lmer)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 1.996 | 0.304 | 6.557 | 0.000 |
lex_noskew_VERSUS_mixed | -1.296 | 0.617 | -2.099 | 0.036 |
lex_noskew_VERSUS_lex_skew | 1.759 | 0.691 | 2.544 | 0.011 |
correct_det.ct | -0.220 | 0.406 | -0.541 | 0.589 |
lex_noskew_VERSUS_mixed:correct_det.ct | -0.447 | 0.625 | -0.715 | 0.475 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | -0.948 | 0.787 | -1.205 | 0.228 |
For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:
lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF = abs(summary(adult.trained.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF
## [1] 1.295501
lex_noskew_VERSUS_mixed_adult_trained_SE = abs(summary(adult.trained.lmer)$coefficients[8])
lex_noskew_VERSUS_mixed_adult_trained_SE
## [1] 0.617079
lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF = abs(summary(adult.trained.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF
## [1] 1.759347
lex_noskew_VERSUS_lex_skew_adult_trained_SE = abs(summary(adult.trained.lmer)$coefficients[9])
lex_noskew_VERSUS_lex_skew_adult_trained_SE
## [1] 0.691434
We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011)
Bf(sd=lex_noskew_VERSUS_mixed_adult_trained_SE , obtained=lex_noskew_VERSUS_mixed_adult_trained_MEANDIFF , uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2491817
##
## $Likelihoodnull
## [1] 0.07136568
##
## $BayesFactor
## [1] 3.491618
Bf(sd=lex_noskew_VERSUS_lex_skew_adult_trained_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_trained_MEANDIFF , uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2302123
##
## $Likelihoodnull
## [1] 0.02265973
##
## $BayesFactor
## [1] 10.15953
Select only data from the skewed condition and center (i) the variable of interest (majority_det - which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable.
exp1.adult.trained.skew = subset(exp1.adult.trained, condition == "lex_skew")
exp1.adult.trained.skew= lizCenter(exp1.adult.trained.skew, list("correct_det", "majority_det"))
Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants.
adult.trained.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.trained.skew)
kable(summary(adult.trained.skew.lmer)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 6.266 | 0.003 | 2063.412 | 0 |
majority_det.ct | 2.492 | 0.003 | 820.722 | 0 |
correct_det.ct | 4.295 | 0.003 | 1414.478 | 0 |
majority_det.ct:correct_det.ct | 4.042 | 0.003 | 1331.179 | 0 |
We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:
majority_VERSUS_minority_adult_trained_MEANDIFF = abs(summary(adult.trained.skew.lmer)$coefficients[2])
majority_VERSUS_minority_adult_trained_MEANDIFF
## [1] 2.492241
majority_VERSUS_minority_adult_trained_SE = abs(summary(adult.trained.skew.lmer)$coefficients[6])
majority_VERSUS_minority_adult_trained_SE
## [1] 0.003036643
We now use these values in the Bf calculator along with the predicted difference of 2.161343 (obtained from equivalent analsyes over data from Wonnacott, 2011)
Bf(sd=majority_VERSUS_minority_adult_trained_SE , obtained=majority_VERSUS_minority_adult_trained_MEANDIFF , uniform = 0, sdtheory = 2.161343, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1314065
##
## $Likelihoodnull
## [1] 0
##
## $BayesFactor
## [1] Inf
Remove two of the three majority particle nouns (specifically, those coded as trainednoun3 and trainednoun4) from the lex_skew condition. This will leave just two nouns - coded trainednoun1 and trainednoun2 - which go with the minority particle and majority particle respectively, so that majority and minority particle nouns contribute equally to the input.
exp1.adult.trained$remove = 0
exp1.adult.trained$remove[exp1.adult.trained$condition == "lex_skew" & exp1.adult.trained$noun == "trainednoun3" ]=1
exp1.adult.trained$remove[exp1.adult.trained$condition == "lex_skew" & exp1.adult.trained$noun == "trainednoun4" ]=1
exp1.adult.trained.balanced = subset(exp1.adult.trained, remove == 0)
kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = subset(exp1.adult.trained.balanced, condition=="lex_skew")), measurevar="correct", groupvars = "condition"), digits = 2)
condition | N | correct | sd | se | ci |
---|---|---|---|---|---|
lex_skew | 15 | 0.94 | 0.13 | 0.03 | 0.07 |
Set up the data frame so that we can look at the contrasts bewteen the lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function
exp1.adult.trained.balanced = lizContrasts(exp1.adult.trained.balanced, exp1.adult.trained.balanced$condition, "lex_noskew")
exp1.adult.trained.balanced = lizCenter(exp1.adult.trained.balanced, list("correct_det"))
Run the lmer model:
adult.trained.lmer.balanced = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.trained.balanced)
kable(summary(adult.trained.lmer.balanced)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 2.086 | 0.331 | 6.294 | 0.000 |
lex_noskew_VERSUS_mixed | -1.241 | 0.565 | -2.196 | 0.028 |
lex_noskew_VERSUS_lex_skew | 2.149 | 0.808 | 2.660 | 0.008 |
correct_det.ct | -0.665 | 0.529 | -1.258 | 0.209 |
lex_noskew_VERSUS_mixed:correct_det.ct | -0.322 | 0.660 | -0.487 | 0.626 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | -1.955 | 1.262 | -1.549 | 0.121 |
For the contrast between lex_skew and lex_noskew, we require an estimate of the mean difference between the conditions and of the corresponding standard error and this is taken from the model above:
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF = abs(summary(adult.trained.lmer.balanced)$coefficients[3])
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF
## [1] 2.148546
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE = abs(summary(adult.trained.lmer.balanced)$coefficients[9])
lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE
## [1] 0.8076634
We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011)
Bf(sd=lex_noskew_VERSUS_lex_skew_adult_trained_balanced_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_trained_balanced_MEANDIFF , uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.2088631
##
## $Likelihoodnull
## [1] 0.01435497
##
## $BayesFactor
## [1] 14.54988
exp1.adult.me = subset(alldata, agegroup == "adult" & old_me == "m.e.")
In each condition, the number of possible trials contributing to the analysis is 15 adults * 2 nouns * 4 productions per noun making 1524 trials. There is no generalist condition for adults. The proportion of missing trials in each condition is therefore:
1-(round(table(exp1.adult.me$condition)/c(0,15*2*4,15*2*4,15*2*4),2))
##
## gen_noskew lex_noskew lex_skew mixed
## 0.00 0.00 0.01
kable(summarySE(aggregate(correct ~ participantid + condition , FUN = mean, data = exp1.adult.me), measurevar="correct", groupvars = "condition"), digits = 2)
condition | N | correct | sd | se | ci |
---|---|---|---|---|---|
lex_noskew | 15 | 0.89 | 0.27 | 0.07 | 0.15 |
lex_skew | 15 | 0.97 | 0.13 | 0.03 | 0.07 |
mixed | 15 | 0.84 | 0.26 | 0.07 | 0.14 |
Separate out the nouns with the majority/ minority particle in the lexicalist-skewed condition.
kable(summarySE(aggregate(correct ~ participantid + majority_det, FUN = mean, data = subset(exp1.adult.me, condition == "lex_skew")), measurevar="correct", groupvars = "majority_det"), digits = 2)
majority_det | N | correct | sd | se | ci |
---|---|---|---|---|---|
0 | 15 | 1.00 | 0.00 | 0.00 | 0.00 |
1 | 15 | 0.93 | 0.26 | 0.07 | 0.14 |
Set up the data frame so that we can look at the contrasts bewteen the lex_noskew condition and each of the other conditions using the lizContrasts helper function, and center the control variable using the lizCenter function.
exp1.adult.me = lizContrasts(exp1.adult.me, exp1.adult.me$condition, "lex_noskew")
exp1.adult.me = lizCenter(exp1.adult.me, list("correct_det"))
Run the lmer model:
adult.me.lmer = glmer(correct ~ (lex_noskew_VERSUS_mixed + lex_noskew_VERSUS_lex_skew) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.me)
kable(summary(adult.me.lmer)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 12.143 | 9.543 | 1.273 | 0.203 |
lex_noskew_VERSUS_mixed | -0.704 | 1.990 | -0.354 | 0.724 |
lex_noskew_VERSUS_lex_skew | 9.727 | 28.296 | 0.344 | 0.731 |
correct_det.ct | 6.150 | 19.085 | 0.322 | 0.747 |
lex_noskew_VERSUS_mixed:correct_det.ct | -0.150 | 3.523 | -0.043 | 0.966 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | 18.082 | 56.582 | 0.320 | 0.749 |
For each contrast, we require an estimate of the mean difference between the conditions and of the corresponding standard error. These are taken from the model described above:
lex_noskew_VERSUS_mixed_adult_me_MEANDIFF = abs(summary(adult.me.lmer)$coefficients[2])
lex_noskew_VERSUS_mixed_adult_me_MEANDIFF
## [1] 0.7039999
lex_noskew_VERSUS_mixed_adult_me_SE = abs(summary(adult.me.lmer)$coefficients[8])
lex_noskew_VERSUS_mixed_adult_me_SE
## [1] 1.990372
lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF = abs(summary(adult.me.lmer)$coefficients[3])
lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF
## [1] 9.727041
lex_noskew_VERSUS_lex_skew_adult_me_SE = abs(summary(adult.me.lmer)$coefficients[9])
lex_noskew_VERSUS_lex_skew_adult_me_SE
## [1] 28.29604
We now use these values in the Bf calculator along with the predicted difference of 2.757657 (obtained from equivalent analsyes over data from Wonnacott, 2011)
Bf(sd=lex_noskew_VERSUS_mixed_adult_me_SE , obtained=lex_noskew_VERSUS_mixed_adult_me_MEANDIFF , uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.1411117
##
## $Likelihoodnull
## [1] 0.1882823
##
## $BayesFactor
## [1] 0.7494684
Bf(sd=lex_noskew_VERSUS_lex_skew_adult_me_SE, obtained=lex_noskew_VERSUS_lex_skew_adult_me_MEANDIFF , uniform = 0, sdtheory = 2.757657, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.01361319
##
## $Likelihoodnull
## [1] 0.01328997
##
## $BayesFactor
## [1] 1.024321
Select the subset of data from skewed langauge and center (i) the variable of interest (majority_det - which codes for whether this is a noun where the more frequent particle is the correct one) and (ii) the control variable
exp1.adult.me.skew = subset(exp1.adult.me, condition == "lex_skew")
exp1.adult.me.skew = lizCenter(exp1.adult.me.skew, list("correct_det", "majority_det"))
Run an lme to see if participants are more accurate with the majority particle. Note that majority_det is within participants.
adult.me.skew.lmer = glmer(correct ~ (majority_det.ct * correct_det.ct) + (majority_det.ct * correct_det.ct|participantid), control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.adult.me.skew)
kable(summary(adult.me.skew.lmer)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 24.977 | 11.937 | 2.092 | 0.036 |
majority_det.ct | -8.643 | 20.176 | -0.428 | 0.668 |
correct_det.ct | 5.771 | 19.698 | 0.293 | 0.770 |
majority_det.ct:correct_det.ct | 11.895 | 20.724 | 0.574 | 0.566 |
We require an estimate of the mean difference between the conditions and corresponding standard error. These are taken from the model above:
majority_VERSUS_minority_adult_me_MEANDIFF = abs(summary(adult.me.skew.lmer)$coefficients[2])
majority_VERSUS_minority_adult_me_MEANDIFF
## [1] 8.64322
majority_VERSUS_minority_adult_me_SE = abs(summary(adult.me.skew.lmer)$coefficients[6])
majority_VERSUS_minority_adult_me_SE
## [1] 20.17558
We now use these values in the Bf calculator along with the predicted difference of 1.261336 (obtained from equivalent analsyes over data from Wonnacott, 2011)
Bf(sd=majority_VERSUS_minority_adult_me_SE , obtained=majority_VERSUS_minority_adult_me_MEANDIFF , uniform = 0, sdtheory = 1.261336, meanoftheory = 0, tail=1)
## $LikelihoodTheory
## [1] 0.01835918
##
## $Likelihoodnull
## [1] 0.0180398
##
## $BayesFactor
## [1] 1.017704
Trained Nouns (aware = 1, unaware = 0)
kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.trained), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition | lexicallyaware | N | correct | sd | se | ci |
---|---|---|---|---|---|---|
lex_noskew | 1 | 15 | 0.78 | 0.24 | 0.06 | 0.13 |
lex_skew | 0 | 1 | 1.00 | NA | NA | NA |
lex_skew | 1 | 14 | 0.92 | 0.14 | 0.04 | 0.08 |
mixed | 0 | 6 | 0.60 | 0.18 | 0.08 | 0.19 |
mixed | 1 | 9 | 0.61 | 0.17 | 0.06 | 0.13 |
Minimal Exposure Nouns
kable(summarySE(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.me), measurevar="correct", groupvars = c("condition", "lexicallyaware")), digits = 2)
condition | lexicallyaware | N | correct | sd | se | ci |
---|---|---|---|---|---|---|
lex_noskew | 1 | 15 | 0.89 | 0.27 | 0.07 | 0.15 |
lex_skew | 0 | 1 | 1.00 | NA | NA | NA |
lex_skew | 1 | 14 | 0.96 | 0.13 | 0.04 | 0.08 |
mixed | 0 | 6 | 0.81 | 0.35 | 0.14 | 0.37 |
mixed | 1 | 9 | 0.86 | 0.19 | 0.06 | 0.15 |
Compare the extent of reported awareness of lexical patterns in the different conditions using fishers exact test.
awaretable = with(aggregate(correct ~ participantid + condition + lexicallyaware, FUN = mean, data = exp1.adult.me),
table(condition, lexicallyaware))
kable(awaretable)
0 | 1 | |
---|---|---|
gen_noskew | 0 | 0 |
lex_noskew | 0 | 15 |
lex_skew | 1 | 14 |
mixed | 6 | 9 |
# mixed versus lexicalist-unskewed
fisher.test(rbind(awaretable[2,], awaretable[4,]))
##
## Fisher's Exact Test for Count Data
##
## data: rbind(awaretable[2, ], awaretable[4, ])
## p-value = 0.01686
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.0000000 0.6740343
## sample estimates:
## odds ratio
## 0
# mixed versus lexicalist-skewed
fisher.test(rbind(awaretable[3,], awaretable[4,]))
##
## Fisher's Exact Test for Count Data
##
## data: rbind(awaretable[3, ], awaretable[4, ])
## p-value = 0.08008
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.002183889 1.190995284
## sample estimates:
## odds ratio
## 0.1151766
An anoynouns reviewer pointed out that although differenes between the groups are not statistically significant, the children in the lexicalist-skewed condition are somehwat older and have a higher listening span than children in the other conditions.
To explore this confound:
First, we looked for the following correlations:
(Note that we use non-parametric statsitics given that our data are underlyingly binomial - however results with Pearson are equivalent)
with(aggregate(cbind(age_months,correct,listeningspanRAW) ~ participantid, exp1.child.trained, FUN=mean), Kendall(correct, age_months))
## tau = 0.0938, 2-sided pvalue =0.40557
with(aggregate(cbind(age_months,correct,listeningspanRAW) ~ participantid, exp1.child.trained, FUN=mean), Kendall(correct, listeningspanRAW))
## tau = 0.0655, 2-sided pvalue =0.57197
with(aggregate(cbind(age_months,correct,listeningspanRAW) ~ participantid, exp1.child.me, FUN=mean), Kendall(correct, age_months))
## tau = 0.088, 2-sided pvalue =0.37565
with(aggregate(cbind(age_months,correct,listeningspanRAW) ~ participantid, exp1.child.me, FUN=mean), Kendall(correct, listeningspanRAW))
## tau = 0.0914, 2-sided pvalue =0.36537
There are no signifcant correlations.
Second, we re-ran the models for children above such that they include each of age and listening span
exp1.child.trained = lizCenter(exp1.child.trained, list("age_months", "listeningspanRAW"))
child.trained.lmer.age = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + age_months.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained)
kable(summary(child.trained.lmer.age)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 2.419 | 0.329 | 7.348 | 0.000 |
lex_noskew_VERSUS_mixed | -0.020 | 0.586 | -0.035 | 0.972 |
lex_noskew_VERSUS_lex_skew | 3.101 | 0.742 | 4.179 | 0.000 |
correct_det.ct | -0.348 | 0.705 | -0.494 | 0.622 |
age_months.ct | 0.019 | 0.044 | 0.432 | 0.666 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.134 | 1.323 | 0.101 | 0.919 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | 0.713 | 1.599 | 0.446 | 0.655 |
child.trained.lmer.listspan = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + listeningspanRAW.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained)
kable(summary(child.trained.lmer.listspan)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 2.412 | 0.327 | 7.367 | 0.000 |
lex_noskew_VERSUS_mixed | -0.168 | 0.573 | -0.293 | 0.770 |
lex_noskew_VERSUS_lex_skew | 3.295 | 0.750 | 4.395 | 0.000 |
correct_det.ct | -0.283 | 0.717 | -0.394 | 0.693 |
listeningspanRAW.ct | -0.173 | 0.089 | -1.948 | 0.051 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.116 | 1.378 | 0.084 | 0.933 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | 0.878 | 1.638 | 0.536 | 0.592 |
exp1.child.me = lizCenter(exp1.child.me, list("age_months", "listeningspanRAW"))
child.me.lmer.age = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + age_months.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me)
kable(summary(child.me.lmer.age)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 1.719 | 0.381 | 4.507 | 0.000 |
lex_noskew_VERSUS_mixed | -0.458 | 0.876 | -0.523 | 0.601 |
lex_noskew_VERSUS_lex_skew | 2.775 | 0.975 | 2.846 | 0.004 |
lex_noskew_VERSUS_gen_noskew | -0.683 | 0.866 | -0.789 | 0.430 |
correct_det.ct | -0.494 | 0.864 | -0.573 | 0.567 |
age_months.ct | 0.023 | 0.068 | 0.346 | 0.730 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.870 | 2.089 | 0.416 | 0.677 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | -0.066 | 2.263 | -0.029 | 0.977 |
lex_noskew_VERSUS_gen_noskew:correct_det.ct | -0.357 | 2.091 | -0.171 | 0.865 |
child.me.lmer.age = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + listeningspanRAW.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me)
kable(summary(child.me.lmer.age)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 1.718 | 0.385 | 4.467 | 0.000 |
lex_noskew_VERSUS_mixed | -0.540 | 0.879 | -0.615 | 0.539 |
lex_noskew_VERSUS_lex_skew | 2.891 | 1.002 | 2.885 | 0.004 |
lex_noskew_VERSUS_gen_noskew | -0.639 | 0.875 | -0.730 | 0.465 |
correct_det.ct | -0.376 | 0.886 | -0.425 | 0.671 |
listeningspanRAW.ct | -0.077 | 0.123 | -0.625 | 0.532 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.787 | 2.115 | 0.372 | 0.710 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | 0.127 | 2.301 | 0.055 | 0.956 |
lex_noskew_VERSUS_gen_noskew:correct_det.ct | -0.422 | 2.117 | -0.199 | 0.842 |
In line with the correlations, age and listening span are not signficant predictors for either input nouns or minimal exposure nouns. Critically, we see the same pattern of results for key experimental effects (i.e., a reliable contrast between the lexicalist-skewed and lexicalist-unskewed conditions, but no other reliable differences).
As a final check, we re-ran the models having removed the three oldest children from the lexicalist-skewed condition (making the lexicalist-skewed and lexicalist-unskewed means near identical for age).
aggregated.child.age.listsp = aggregate(cbind(age_months, listeningspanRAW, listeningspan) ~ participantid + condition, subset(alldata, agegroup == "child"), FUN=mean)
aggregated.child.age.listsp
## participantid condition age_months listeningspanRAW
## 1 gen_child_1 gen_noskew 80 6
## 2 gen_child_10 gen_noskew 81 13
## 3 gen_child_11 gen_noskew 72 7
## 4 gen_child_12 gen_noskew 77 1
## 5 gen_child_13 gen_noskew 70 7
## 6 gen_child_14 gen_noskew 74 7
## 7 gen_child_15 gen_noskew 72 3
## 8 gen_child_2 gen_noskew 72 4
## 9 gen_child_3 gen_noskew 74 1
## 10 gen_child_4 gen_noskew 70 11
## 11 gen_child_5 gen_noskew 78 14
## 12 gen_child_6 gen_noskew 70 6
## 13 gen_child_7 gen_noskew 73 7
## 14 gen_child_8 gen_noskew 70 6
## 15 gen_child_9 gen_noskew 81 7
## 16 lex_noskew_child_1 lex_noskew 70 7
## 17 lex_noskew_child_10 lex_noskew 79 5
## 18 lex_noskew_child_11 lex_noskew 69 12
## 19 lex_noskew_child_12 lex_noskew 68 5
## 20 lex_noskew_child_13 lex_noskew 70 10
## 21 lex_noskew_child_14 lex_noskew 71 6
## 22 lex_noskew_child_15 lex_noskew 73 1
## 23 lex_noskew_child_2 lex_noskew 79 4
## 24 lex_noskew_child_3 lex_noskew 74 1
## 25 lex_noskew_child_4 lex_noskew 72 6
## 26 lex_noskew_child_5 lex_noskew 80 5
## 27 lex_noskew_child_6 lex_noskew 81 10
## 28 lex_noskew_child_7 lex_noskew 64 7
## 29 lex_noskew_child_8 lex_noskew 77 5
## 30 lex_noskew_child_9 lex_noskew 77 8
## 31 lex_skew_child_1 lex_skew 79 6
## 32 lex_skew_child_10 lex_skew 80 7
## 33 lex_skew_child_11 lex_skew 75 9
## 34 lex_skew_child_12 lex_skew 77 8
## 35 lex_skew_child_13 lex_skew 69 7
## 36 lex_skew_child_14 lex_skew 72 7
## 37 lex_skew_child_15 lex_skew 70 7
## 38 lex_skew_child_2 lex_skew 72 10
## 39 lex_skew_child_3 lex_skew 78 7
## 40 lex_skew_child_4 lex_skew 70 6
## 41 lex_skew_child_5 lex_skew 72 4
## 42 lex_skew_child_6 lex_skew 78 8
## 43 lex_skew_child_7 lex_skew 80 7
## 44 lex_skew_child_8 lex_skew 81 8
## 45 lex_skew_child_9 lex_skew 73 7
## 46 mixed_child_1 mixed 62 5
## 47 mixed_child_10 mixed 69 10
## 48 mixed_child_11 mixed 72 0
## 49 mixed_child_12 mixed 72 3
## 50 mixed_child_13 mixed 67 4
## 51 mixed_child_14 mixed 72 6
## 52 mixed_child_15 mixed 64 3
## 53 mixed_child_2 mixed 79 8
## 54 mixed_child_3 mixed 77 6
## 55 mixed_child_4 mixed 74 5
## 56 mixed_child_5 mixed 72 6
## 57 mixed_child_6 mixed 78 5
## 58 mixed_child_7 mixed 81 6
## 59 mixed_child_8 mixed 79 8
## 60 mixed_child_9 mixed 69 6
## listeningspan
## 1 99
## 2 133
## 3 104
## 4 75
## 5 125
## 6 104
## 7 85
## 8 89
## 9 75
## 10 136
## 11 138
## 12 119
## 13 104
## 14 119
## 15 104
## 16 125
## 17 94
## 18 136
## 19 113
## 20 136
## 21 119
## 22 75
## 23 89
## 24 75
## 25 99
## 26 94
## 27 119
## 28 125
## 29 94
## 30 109
## 31 99
## 32 104
## 33 114
## 34 109
## 35 125
## 36 104
## 37 125
## 38 119
## 39 104
## 40 119
## 41 89
## 42 109
## 43 101
## 44 109
## 45 104
## 46 113
## 47 136
## 48 70
## 49 85
## 50 108
## 51 99
## 52 102
## 53 109
## 54 99
## 55 94
## 56 99
## 57 94
## 58 99
## 59 109
## 60 119
exp1.child.trained.agebalenced = subset(exp1.child.trained, participantid != "lex_skew_child_2" & participantid != "lex_skew_child_6" & participantid != "lex_skew_child_3")
exp1.child.me.agebalenced = subset(exp1.child.me, participantid != "lex_skew_child_2" & participantid != "lex_skew_child_6" & participantid != "lex_skew_child_3")
aggregate(age_months ~ condition, exp1.child.trained.agebalenced, FUN=mean)
## condition age_months
## 1 lex_noskew 73.77632
## 2 lex_skew 74.97312
## 3 mixed 72.43220
child.trained.lmer.agebal = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.trained.agebalenced)
kable(summary(child.trained.lmer.agebal)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 2.388 | 0.345 | 6.927 | 0.000 |
lex_noskew_VERSUS_mixed | -0.045 | 0.594 | -0.077 | 0.939 |
lex_noskew_VERSUS_lex_skew | 3.034 | 0.794 | 3.824 | 0.000 |
correct_det.ct | -0.097 | 0.737 | -0.131 | 0.895 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.143 | 1.346 | 0.106 | 0.915 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | 1.344 | 1.704 | 0.789 | 0.430 |
child.me.lmer.agebal = glmer(correct ~ (lex_noskew_VERSUS_mixed+ lex_noskew_VERSUS_lex_skew + lex_noskew_VERSUS_gen_noskew ) * correct_det.ct + (correct_det.ct|participantid),control=glmerControl(optimizer = "bobyqa"), family = binomial, data = exp1.child.me.agebalenced)
kable(summary(child.me.lmer.agebal)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 1.794 | 0.388 | 4.630 | 0.000 |
lex_noskew_VERSUS_mixed | -0.462 | 0.833 | -0.555 | 0.579 |
lex_noskew_VERSUS_lex_skew | 3.164 | 1.076 | 2.941 | 0.003 |
lex_noskew_VERSUS_gen_noskew | -0.678 | 0.828 | -0.819 | 0.413 |
correct_det.ct | -0.603 | 0.917 | -0.658 | 0.511 |
lex_noskew_VERSUS_mixed:correct_det.ct | 0.837 | 2.177 | 0.384 | 0.701 |
lex_noskew_VERSUS_lex_skew:correct_det.ct | -0.486 | 2.526 | -0.192 | 0.847 |
lex_noskew_VERSUS_gen_noskew:correct_det.ct | -0.396 | 2.177 | -0.182 | 0.856 |
The contrast between lexicalist-skewed and lexicalist-unskewed remains reliable.
An anonymous reviewer suggested that it would be interesting to look at the error trials given that Hudson Kam and Newport (2009) found different error patterns depending on whether the participant had been exposed to a lexicalist language or a generalist language (that also had skew). Specifically, participants exposed to the lexicalist language were more likely than those exposed to the generalist language to produce no particle. This may suggest that they had learned something about the lexical nature of the language, but could not remember which particle was correct.
The table below shows the number of trials (out of all trials, after noun-incorrect trials have been removed) in which children produced errors (there was only 1 error for adults, and this was a “noun-incorrect” error, most likely reflecting a lapse in attention). The errors are split according to whether no particle was produced, or whether a particle was produced, but it was not identifiable as one of the target particles.
data_error = subset(data, agegroup == "child" & noun_correct == "1")
xtabs(~ old_me + det_used_coded + condition, data_error)
## , , condition = gen_noskew
##
## det_used_coded
## old_me det1 det2 none other
## m.e. 51 63 0 2
## old 89 133 1 12
##
## , , condition = lex_noskew
##
## det_used_coded
## old_me det1 det2 none other
## m.e. 62 52 1 0
## old 131 97 9 0
##
## , , condition = lex_skew
##
## det_used_coded
## old_me det1 det2 none other
## m.e. 51 62 0 3
## old 56 177 1 1
##
## , , condition = mixed
##
## det_used_coded
## old_me det1 det2 none other
## m.e. 58 56 1 4
## old 119 112 3 3
Numbers are too small to analyse (the 3.80% of missing particles shown for the lexicalist-unskewed condition comprise only 9 productions, 8 from one child, and one from another), although it appears that there is some evidence that for input nouns, children were more likely to leave off the particle in the lexicalist-unskewed condition. This possibly reflects the fact that children learning this language avoided producing an incorrect particle because they knew that that the noun went with a particular particle, but were unsure which particle that was. This would suggest that they had learned something about the overall lexical nature of the input, but something which doesn’t show up in our current analyses.
However, if we also include data from the additional 23 children tested whose data were not included in the analyses (see footnote 2) we now see overall a greater proportion of “No particle produced” errors in the mixed condition (input nouns 5.06%, minimal exposure nouns 3.57%) than in the lexicalist unskewed condition (input nouns 2.52%, minimal exposure nouns 0.52%).
# used to calcuate shannon entropy
shannon.entropy <- function(pDistribution) {
if (min(pDistribution) <= 0)
{
#print('Note that there are 0 values here, so returning entropy 0')
0}
else {
-sum(log2(pDistribution)*pDistribution)}
}
# used to calcuate entropy for binomial situation with two outcomes (outcome 1 and outcome2) - x is the number of trials with outcome 1 and n is the total number of trials
binom.entropy <- function(x, n) {
shannon.entropy(c(x/n,(n-x)/n))
}
# used to calcuate the average binomial entropy across a some set. (Here it is used where we want the average entropy for a set of nouns, each with their own number of trials and of each outcome). df is a data frame with first column outcome 1, second column outcome 2
average.binom.entropy <- function(df) {
total = sum(df)
for(i in 1:nrow(df)) {
df$Entropy[i] = binom.entropy(df[i,1], (df[i,1]+ df[i,2]))
df$Weights[i] = (df[i,1] + df[i,2])/total
df$WeightedEntropy = df$Weights * df$Entropy
}
sum(df$WeightedEntropy, na.rm=T)
}
# used to work out: if there are n trials with a given probablity (here we use chance p=0.5) what is the expected entropy? Calculate by generating lots of trials and getting the mean (returned as $mean). How many of those woud have a p the same or lower than a given value of entropy (veridical) (returned as $p)?
total.entropy.monte <- function(n,p1,veridical.entropy,trials) {
msample <- replicate(trials,binom.entropy(rbinom(1,n,p1),n))
c=0
for (i in 1:trials) {
if(msample[i]<=veridical.entropy) c=c+1
}
list(mean=mean(msample),p=c/trials)
}
# as for total entropy, but looking at averaging entropy across a set for each noun (weighted by noun frequency)
average.entropy.monte <- function(df,p1,veridical.entropy,trials) {
c= 0
msample = c(0)
for (i in 1:trials) {
x1 = c(0)
x2 = c(0)
for (j in 1:nrow(df) ) {
x1[j] = rbinom(1,(df[j,1]+df[j,2]),p1)
x2[j] = (df[j,1]+df[j,2])-x1[j]
}
msample[i] = average.binom.entropy(data.frame(x1,x2))
if(msample[i]<=veridical.entropy) c=c+1
}
list(mean=mean(msample),p=c/trials)
}
alternating = subset(data, noun_type=="alternating")
# Remove trials where the participant used an incorrect noun
alternating = subset(alternating, noun_correct=="1")
# Remove trials where the participant produced an alternative word for a particle
alternating = subset(alternating, det_used_coded!="other")
# Remove trials where the participant did not produe a particle at all
alternating = subset(alternating, det_used_coded!="none")
# get trials for each condition/age group
alternating = droplevels(alternating)
Data for alternating nouns in the mixed and generalist-unskewed conditions was collected but not included in the main analsyes. Note that there is no “correct” particle which should be used in for these nouns. However other research with artificial languages has suggested that children may avoid probabilistic alternation in their own productions, so called “regularization” (e.g. Hudson-Kam & Newport, 2005, Samara, Smith, Brown, & Wonnacott, in submission, Cognitive Psychology).
Orgnaize the data so that for each participant, we can view (i) how many times they produce each particle in total (ii) how many times they produce each particle with each noun. (Note that in the mixed langauges, data from trainednoun1 and trainednoun2 should be ignored - since these nouns are not actually alternating they are shown here as having 0 total productions. Data from these nouns is ignored in the subsequent analyses)
alternating$count = 1
x = aggregate(count ~ participantid + det_used_coded + condition + agegroup,data = alternating,FUN=sum )
x = reshape(x, idvar = c("participantid", "condition", "agegroup"), timevar = "det_used_coded", direction = "wide" )
x=rename(x, c(count.det1 = "det1",count.det2 = "det2"))
x[,4:5][is.na(x[,4:5])]=0
y = aggregate(count ~ participantid + det_used_coded + condition + agegroup + noun,data = alternating,FUN=sum )
y = reshape(y, idvar = c("participantid", "condition", "agegroup","noun"), timevar = c("det_used_coded"), direction = "wide")
y = reshape(y, idvar = c("participantid", "condition", "agegroup"), timevar = c("noun"), direction = "wide")
y=rename(y, c(count.det1.trainednoun1 = "n1.det1",
count.det2.trainednoun1 = "n1.det2",
count.det1.trainednoun2 = "n2.det1",
count.det2.trainednoun2 = "n2.det2",
count.det1.trainednoun3 = "n3.det1",
count.det2.trainednoun3 = "n3.det2",
count.det1.trainednoun4 = "n4.det1",
count.det2.trainednoun4 = "n4.det2"))
y[,4:11][is.na(y[,4:11])]=0
d = merge(x,y)
Calculate total entropy of each participant’s productions: One method by which participants might reduce the unpredictability of particle usage in their own productions is to boost the frequency of one of the two particles at the expense of the other. To capture this type of regularization, we computed the total entropy of each participant’s particle use, given by:
Entropy = - sumof(P(i) log2P(i))
where the sum is over the two possible particle choices, and P(i) is the frequency of particle i in a participant’s productions.
Entropy measures how variable a participant’s productions are: entropy of 0 corresponds to a participant who consistently uses a single particle, and entropy is at a maximum (entropy = 1) when both particles are used equiprobably (as in the input for alternating nouns).
Montecarlo is used to calculate what entropy would be expected if participants were to produce the particles at random (with p=0.5) (note that participants each have different numbers of nouns contributing to these analsyes)
for(i in 1:nrow(d)){
d$Entropy[i] = if(sum(d$det1[i],d$det2[i])>0) {binom.entropy(d$det1[i], sum(d$det1[i],d$det2[i]))} else {is.na}
monte = total.entropy.monte(sum(d$det1[i],d$det2[i]),0.5,d$Entropy[i],1000)
d$ChanceEntropy[i]= monte$mean
d$Probability[i]= monte$p
}
Calculate the conditional entropy of particle use given the noun being marked, which is simply the average entropy of particle use for each noun considered separately (where the average is weighted by noun frequency). This gives an indication of any regularization on a noun by noun basis.
for(i in 1:nrow(d)){
x1 = c(if(sum(d$n1.det1[i],d$n1.det2[i])>0) {d$n1.det1[i]} else {NA},
if(sum(d$n2.det1[i],d$n2.det2[i])>0) {d$n2.det1[i]} else {NA},
if(sum(d$n3.det1[i],d$n3.det2[i])>0) {d$n3.det1[i]} else {NA},
if(sum(d$n4.det1[i],d$n4.det2[i])>0) {d$n4.det1[i]} else {NA})
x2 = c(if(sum(d$n1.det1[i],d$n1.det2[i])>0) {d$n1.det2[i]} else {NA},
if(sum(d$n2.det1[i],d$n2.det2[i])>0) {d$n2.det2[i]} else {NA},
if(sum(d$n3.det1[i],d$n3.det2[i])>0) {d$n3.det2[i]} else {NA},
if(sum(d$n4.det1[i],d$n4.det2[i])>0) {d$n4.det2[i]} else {NA})
df = na.omit(data.frame(x1,x2))
d$LexicalEntropy[i] = average.binom.entropy(df)
monte = average.entropy.monte(df,0.5,d$LexicalEntropy[i],1000)
d$ChanceLexicalEntropy[i]= monte$mean
d$ProbabilityLexicalEntropy[i]= monte$p
}
kable(d, digits = 3)
participantid | condition | agegroup | det1 | det2 | n1.det1 | n1.det2 | n2.det1 | n2.det2 | n3.det1 | n3.det2 | n4.det1 | n4.det2 | Entropy | ChanceEntropy | Probability | LexicalEntropy | ChanceLexicalEntropy | ProbabilityLexicalEntropy |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
gen_child_1 | gen_noskew | child | 5 | 9 | 0 | 4 | 2 | 2 | 2 | 0 | 1 | 3 | 0.940 | 0.945 | 0.436 | 0.518 | 0.737 | 0.168 |
gen_child_10 | gen_noskew | child | 7 | 9 | 1 | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 0.989 | 0.952 | 0.812 | 0.953 | 0.784 | 0.984 |
gen_child_11 | gen_noskew | child | 7 | 8 | 4 | 0 | 0 | 3 | 3 | 1 | 0 | 4 | 0.997 | 0.947 | 1.000 | 0.216 | 0.766 | 0.006 |
gen_child_12 | gen_noskew | child | 0 | 9 | 0 | 2 | 0 | 2 | 0 | 3 | 0 | 2 | 0.000 | 0.909 | 0.004 | 0.000 | 0.562 | 0.030 |
gen_child_13 | gen_noskew | child | 8 | 7 | 3 | 0 | 3 | 1 | 0 | 4 | 2 | 2 | 0.997 | 0.955 | 1.000 | 0.483 | 0.762 | 0.099 |
gen_child_14 | gen_noskew | child | 8 | 8 | 3 | 1 | 1 | 3 | 4 | 0 | 0 | 4 | 1.000 | 0.954 | 1.000 | 0.406 | 0.783 | 0.034 |
gen_child_15 | gen_noskew | child | 8 | 8 | 0 | 4 | 4 | 0 | 1 | 3 | 3 | 1 | 1.000 | 0.951 | 1.000 | 0.406 | 0.785 | 0.027 |
gen_child_2 | gen_noskew | child | 0 | 16 | 0 | 4 | 0 | 4 | 0 | 4 | 0 | 4 | 0.000 | 0.952 | 0.000 | 0.000 | 0.789 | 0.000 |
gen_child_3 | gen_noskew | child | 0 | 15 | 0 | 4 | 0 | 3 | 0 | 4 | 0 | 4 | 0.000 | 0.949 | 0.000 | 0.000 | 0.763 | 0.000 |
gen_child_4 | gen_noskew | child | 8 | 6 | 3 | 1 | 2 | 2 | 2 | 1 | 1 | 2 | 0.985 | 0.948 | 0.772 | 0.911 | 0.728 | 0.935 |
gen_child_5 | gen_noskew | child | 0 | 13 | 0 | 4 | 0 | 4 | 0 | 4 | 0 | 1 | 0.000 | 0.942 | 0.001 | 0.000 | 0.717 | 0.003 |
gen_child_6 | gen_noskew | child | 16 | 0 | 4 | 0 | 4 | 0 | 4 | 0 | 4 | 0 | 0.000 | 0.952 | 0.000 | 0.000 | 0.779 | 0.000 |
gen_child_7 | gen_noskew | child | 7 | 8 | 3 | 1 | 1 | 3 | 2 | 1 | 1 | 3 | 0.997 | 0.951 | 1.000 | 0.833 | 0.772 | 0.560 |
gen_child_8 | gen_noskew | child | 10 | 6 | 3 | 1 | 3 | 1 | 2 | 2 | 2 | 2 | 0.954 | 0.947 | 0.491 | 0.906 | 0.777 | 0.869 |
gen_child_9 | gen_noskew | child | 5 | 11 | 1 | 3 | 2 | 2 | 0 | 4 | 2 | 2 | 0.896 | 0.950 | 0.236 | 0.703 | 0.774 | 0.393 |
mixed_adult_1 | mixed | adult | 2 | 6 | 0 | 0 | 0 | 0 | 2 | 2 | 0 | 4 | 0.811 | 0.907 | 0.283 | 0.500 | 0.776 | 0.237 |
mixed_adult_10 | mixed | adult | 5 | 3 | 0 | 0 | 0 | 0 | 3 | 1 | 2 | 2 | 0.954 | 0.907 | 0.720 | 0.906 | 0.781 | 0.871 |
mixed_adult_11 | mixed | adult | 5 | 3 | 0 | 0 | 0 | 0 | 2 | 2 | 3 | 1 | 0.954 | 0.903 | 0.722 | 0.906 | 0.775 | 0.865 |
mixed_adult_12 | mixed | adult | 4 | 4 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | 2 | 1.000 | 0.902 | 1.000 | 1.000 | 0.782 | 1.000 |
mixed_adult_13 | mixed | adult | 4 | 4 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | 2 | 1.000 | 0.905 | 1.000 | 1.000 | 0.776 | 1.000 |
mixed_adult_14 | mixed | adult | 2 | 6 | 0 | 0 | 0 | 0 | 1 | 3 | 1 | 3 | 0.811 | 0.909 | 0.280 | 0.811 | 0.789 | 0.469 |
mixed_adult_15 | mixed | adult | 4 | 4 | 0 | 0 | 0 | 0 | 0 | 4 | 4 | 0 | 1.000 | 0.904 | 1.000 | 0.000 | 0.779 | 0.021 |
mixed_adult_2 | mixed | adult | 4 | 4 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | 2 | 1.000 | 0.898 | 1.000 | 1.000 | 0.774 | 1.000 |
mixed_adult_3 | mixed | adult | 3 | 5 | 0 | 0 | 0 | 0 | 2 | 2 | 1 | 3 | 0.954 | 0.906 | 0.729 | 0.906 | 0.787 | 0.854 |
mixed_adult_4 | mixed | adult | 3 | 5 | 0 | 0 | 0 | 0 | 1 | 3 | 2 | 2 | 0.954 | 0.895 | 0.752 | 0.906 | 0.780 | 0.855 |
mixed_adult_5 | mixed | adult | 5 | 3 | 0 | 0 | 0 | 0 | 2 | 2 | 3 | 1 | 0.954 | 0.907 | 0.736 | 0.906 | 0.785 | 0.864 |
mixed_adult_6 | mixed | adult | 5 | 3 | 0 | 0 | 0 | 0 | 3 | 1 | 2 | 2 | 0.954 | 0.898 | 0.708 | 0.906 | 0.786 | 0.862 |
mixed_adult_7 | mixed | adult | 5 | 3 | 0 | 0 | 0 | 0 | 2 | 2 | 3 | 1 | 0.954 | 0.902 | 0.737 | 0.906 | 0.777 | 0.862 |
mixed_adult_8 | mixed | adult | 3 | 5 | 0 | 0 | 0 | 0 | 1 | 3 | 2 | 2 | 0.954 | 0.905 | 0.736 | 0.906 | 0.787 | 0.846 |
mixed_adult_9 | mixed | adult | 3 | 5 | 0 | 0 | 0 | 0 | 2 | 2 | 1 | 3 | 0.954 | 0.905 | 0.729 | 0.906 | 0.788 | 0.845 |
mixed_child_1 | mixed | child | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 4 | 0.000 | 0.894 | 0.014 | 0.000 | 0.747 | 0.028 |
mixed_child_10 | mixed | child | 4 | 4 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | 2 | 1.000 | 0.897 | 1.000 | 1.000 | 0.783 | 1.000 |
mixed_child_11 | mixed | child | 5 | 3 | 0 | 0 | 0 | 0 | 2 | 3 | 3 | 0 | 0.954 | 0.899 | 0.764 | 0.607 | 0.775 | 0.301 |
mixed_child_12 | mixed | child | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 2 | 0.000 | 0.865 | 0.029 | 0.000 | 0.684 | 0.060 |
mixed_child_13 | mixed | child | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 3 | 0.971 | 0.831 | 1.000 | 0.649 | 0.616 | 0.630 |
mixed_child_14 | mixed | child | 8 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 4 | 0 | 0.000 | 0.903 | 0.006 | 0.000 | 0.776 | 0.017 |
mixed_child_15 | mixed | child | 4 | 4 | 0 | 0 | 0 | 0 | 0 | 4 | 4 | 0 | 1.000 | 0.901 | 1.000 | 0.000 | 0.781 | 0.012 |
mixed_child_2 | mixed | child | 4 | 4 | 0 | 0 | 0 | 0 | 0 | 4 | 4 | 0 | 1.000 | 0.905 | 1.000 | 0.000 | 0.788 | 0.016 |
mixed_child_3 | mixed | child | 8 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 4 | 0 | 0.000 | 0.890 | 0.017 | 0.000 | 0.767 | 0.025 |
mixed_child_4 | mixed | child | 1 | 7 | 0 | 0 | 0 | 0 | 1 | 3 | 0 | 4 | 0.544 | 0.908 | 0.068 | 0.406 | 0.783 | 0.138 |
mixed_child_5 | mixed | child | 3 | 5 | 0 | 0 | 0 | 0 | 1 | 3 | 2 | 2 | 0.954 | 0.903 | 0.725 | 0.906 | 0.796 | 0.828 |
mixed_child_6 | mixed | child | 8 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 4 | 0 | 0.000 | 0.902 | 0.007 | 0.000 | 0.790 | 0.008 |
mixed_child_7 | mixed | child | 6 | 1 | 0 | 0 | 0 | 0 | 4 | 0 | 2 | 1 | 0.592 | 0.888 | 0.121 | 0.394 | 0.727 | 0.134 |
mixed_child_8 | mixed | child | 4 | 4 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 4 | 1.000 | 0.905 | 1.000 | 0.000 | 0.779 | 0.017 |
mixed_child_9 | mixed | child | 4 | 4 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | 2 | 1.000 | 0.900 | 1.000 | 1.000 | 0.778 | 1.000 |
We can see that in the generalist condition, 5/15 children reduce total entropy to a level greater than predicted by chance (and in fact each of these fully regularizes, using only only particle across all productions). A further 3/15 children show substantial usage of both particles but showed a greater level of lexical conditioning than predicted by chance (i.e. they used only, or mostly, one particle per noun).
In the mixed condition, 5/15 children reduce total entropy to a level greater than predicted by chance and in fact each of these fully regularizes, using only only particle across all prouctions. A further 3/15 children show substantial usage of both particles but showed a greater level of lexical conditioning than predicted by chance (i.e. they used only one particle per noun).
In the mixed condition, no adults reduce total entropy to a level greater than predicted by chance but one showed lexical conditioning.