Main analyses for Brown, Smith, Samara and Wonnacott (in submission). Data, .Rnd file and and supplmentary analyses at https://osf.io/sy8zr/

Set up

Load packages and functions

#library(languageR)
suppressPackageStartupMessages(library(lattice))
suppressPackageStartupMessages(library(lme4))
library(plotrix)
suppressPackageStartupMessages(library(irr))
library(knitr)
library(plyr)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.5.3

suppressPackageStartupMessages(library(reshape))

## Warning: package 'reshape' was built under R version 3.5.3

suppressPackageStartupMessages(library(reshape2))
library(WRS2)

## Warning: package 'WRS2' was built under R version 3.5.3

library(boot)

## 
## Attaching package: 'boot'

## The following object is masked from 'package:lattice':
## 
##     melanoma

library(Hmisc)

## Loading required package: survival

## 
## Attaching package: 'survival'

## The following object is masked from 'package:boot':
## 
##     aml

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

source('utilities.r')

Set up Theme for Figures

mytheme <- theme(axis.line = element_line(colour = "grey"),
        text = element_text(size=12),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size=12, face="bold"),
        strip.background = element_rect(fill="white", colour = "white"))

#specify colours for the two type frequencies
low_freq_colour = "#EA7D00"
high_freq_colour = "#006DE9"

Production data

Set up and coding

Data Loading and Preparation

prod = read.csv("production.csv")

prod$session = as.factor(prod$session)
prod$awareness = as.factor(prod$awareness)
prod$typefreq= relevel(prod$typefreq, ref = "Low")
prod$age= relevel(prod$age, ref = "Child")

Inter-Rater Reliability for Coding

Use Cohen’s Kappa (used for nominal/categorical variables)

1 = perfect agreement
0 = random agreement
-1 = perfect disagreement

Landis & Koch (1977)

0.0-0.2 = slight agreement
0.21-0.40 = fair agreement
0.41-0.6 = moderate agreement
0.61-0.8 = substantial agreement
0.81-1.0 = perfect/near perfect agreement

Krippendorff (1980)

0.67-0.8 = allows tentative conclusions
0.81 or higher = allows definite conclusions

To compute Cohen’s Kappa we need to compare detHB and detAS columns for trials where there was a single phoneme error

prod$detHB = as.character(prod$detHB)
prod$detAS = as.character(prod$detAS)

# First select only items that were originally coded as "other" responses using a strict (fully correct vs. incorrect) criterion
IRR <- droplevels(subset(prod, det_used_logical=="other"))

# Then look at agreement between re-coding of these items between HB and AS
IRR <- cbind(IRR$detHB, IRR$detAS)
kappa2(IRR, weight = c("unweighted", "equal", "squared"), sort.levels = FALSE)

##  Cohen's Kappa for 2 Raters (Weights: unweighted)
## 
##  Subjects = 2478 
##    Raters = 2 
##     Kappa = 0.993 
## 
##         z = 65.6 
##   p-value = 0

Remove Trials which won’t be included in analyses

First remove trials where the noun produced was incorrect

prod1 <- droplevels(subset(prod, noun_correct=="1"))

Then remove trials in which the participant either did not produce a sentence-final particle or produced something other than one of the two trained particles (i.e. not coding as producing one of det1 or det2 )

prod1 <- droplevels(subset(prod1, detHB!="none"))
prod1 <- droplevels(subset(prod1, detHB!="other"))

Create Separate Dataframes for Children and Adults

prodCHILD <- droplevels(subset(prod1, age=="Child"))
prodADULT <- droplevels(subset(prod1, age=="Adult"))

Trained nouns

Plot - Figure 1

PRODOLD <- droplevels(subset(prod1, oldnew=="old"))

# Aggregate to obtain means per participant/day etc.
aggregated.PRODOLD = aggregate(det_correct ~ participant + awareness + session_fig + consistency_fig + typefreq + age, PRODOLD, FUN=mean)

# Re-order levels within the semantic consistency variable (the default is alphabetical: 1 = fully consistent, 2 = inconsistent, 3 = partially consistent; we want to swap the order of inconsistent/partially consistent)
aggregated.PRODOLD$consistency_fig = factor(aggregated.PRODOLD$consistency_fig, levels(aggregated.PRODOLD$consistency_fig)[c(1,3,2)])
levels(aggregated.PRODOLD$consistency_fig)

## [1] "Fully Consistent"     "Partially Consistent" "Inconsistent"

# Re-order levels within the age variable (the default is alphabetical: 1 = adult, 2 = child, we want to reverse this order)
aggregated.PRODOLD$age = factor(aggregated.PRODOLD$age,
levels(aggregated.PRODOLD$age)[c(2,1)])
levels(aggregated.PRODOLD$age)

## [1] "Adult" "Child"

#for plotting, setting low type frequency first
aggregated.PRODOLD$typefreq_fig <- aggregated.PRODOLD$typefreq
aggregated.PRODOLD$typefreq_fig <- relevel(aggregated.PRODOLD$typefreq_fig,ref="Low")

#In order to achieve plotting style we want (both session and typefreq on the xaxis), need to create a new factor combining type frequency and session
aggregated.PRODOLD$freq_session <- factor(paste(aggregated.PRODOLD$typefreq_fig,aggregated.PRODOLD$session_fig),
                                          levels=c("Low Session 1",
                                                   "Low Session 4",
                                                   "High Session 1", 
                                                   "High Session 4"))

#In order to have point fill style depending on both typefreq and awareness, need to combine those too
#First, coding everyone in Inconsistent as unaware
aggregated.PRODOLD$awareness2 <- ifelse(aggregated.PRODOLD$consistency_fig=="Inconsistent",0,ifelse(aggregated.PRODOLD$awareness==0,0,1))

aggregated.PRODOLD$typefreq_awareness <- factor(paste(aggregated.PRODOLD$typefreq_fig,aggregated.PRODOLD$awareness2))

#Set colours for points - points will differ in colour depending on type frequency and fill
#depending on awareness. Achieving this in ggplot is a bit fiddly!
aggregated.PRODOLD$colourfill <- ifelse(aggregated.PRODOLD$typefreq_awareness=="High 0","transparent",
                                        ifelse(aggregated.PRODOLD$typefreq_awareness=="High 1",high_freq_colour,
                                               ifelse(aggregated.PRODOLD$typefreq_awareness=="Low 0","transparent",
                                                      ifelse(aggregated.PRODOLD$typefreq_awareness=="Low 1",low_freq_colour,"green")))) 



# Need to re-order the colourfill values to match the ordering that is used in the plot facets, so that
# the colours line up with the points they belong to!
colourfill <- aggregated.PRODOLD$colourfill
colourfill <- colourfill[with(aggregated.PRODOLD,order(freq_session,consistency_fig,age,det_correct))]

#Finally, produce the plot
#Diamond and error bars for means
#dotplot for individuals
ggplot(aggregated.PRODOLD, aes(x = freq_session, y = det_correct, colour=typefreq_fig)) +
  stat_summary(fun.y = mean, geom = "point", colour='black',shape=18,size=3) +
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4,colour='black') +
  geom_dotplot(fill=colourfill,binaxis='y',stackdir='center') +
  geom_hline(aes(yintercept=0.50),linetype="dashed") + #chance
  coord_cartesian(ylim = c(0, 1)) + 
  facet_grid(consistency_fig ~ age) + 
  theme_bw() + mytheme +
  xlab("Session") + ylab("Proportion Correct") +
  scale_x_discrete(labels=c("1", "4", "1", "4")) +
  scale_colour_manual(values = c(low_freq_colour,high_freq_colour)) +
  guides(colour=guide_legend(title='Type Frequency', override.aes=list(fill=c(low_freq_colour,high_freq_colour)))) +
  theme(legend.position='bottom') +
  ggsave("_Figures/figure1.pdf",width=6,height=6)

## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

#NB this this works for offsetting the error bars if we want to do that
  #stat_summary(fun.y = mean, geom = "point", colour='black',position=position_nudge(x=c(0.2,-0.2))) +
  #stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4,colour='black',position=position_nudge(x=c(0.2,-0.2))) +

Main statistical Analyses

Children

Run a model with main with factors with two levels given centered numeric coding, and for the 3 level factor consistency use sequential constrasts comparing Fully Consistent to Partially Consistent and then Partially Consistent to Inconsistent.

Note, this is done using a handcoded function - it could also be done using the than MASS::contr.sdiff(3) (we checked and the results are the same). This handed coded function makes it easier for us to break down main effects and interactions within the same model.
Note that in this coding, the contrast Partial_v_Inconsistent is coded so that +ve means that inconsistent is greater than partial- this is flipped for the reporting so that it matches the direction of the prediction.

prodOLDchild <- droplevels(subset(prod1, oldnew=="old" & age == "Child"))

prodOLDchild = lizCenter(prodOLDchild, list("typefreq", "session"))
## creates versions of these factors with centered coding

prodOLDchild = setContrasts(prodOLDchild, prodOLDchild$consistency, "Partial")
## adds Partial_VS_Consistent+Partial_VS_Inconsistent constrasts

prodOLDchild.lmer = glmer(det_correct ~ (Partial_VS_Consistent+Partial_VS_Inconsistent) * typefreq.ct * session.ct + (1 + session.ct|participant), data = prodOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(prodOLDchild.lmer, digits = 3)

## MODEL INFO:
## Observations: 4908
## Dependent Variable: det_correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 5779.201, BIC = 5876.680
## Pseudo-R² (fixed effects) = 0.112
## Pseudo-R² (total) = 0.265 
## 
## FIXED EFFECTS:
## ---------------------------------------------------------------------
##                                                          Est.    S.E.
## ---------------------------------------------------- -------- -------
## (Intercept)                                             0.909   0.090
## Partial_VS_Consistent                                   0.666   0.219
## Partial_VS_Inconsistent                                -0.090   0.212
## typefreq.ct                                            -0.291   0.176
## session.ct                                              1.115   0.106
## Partial_VS_Consistent:typefreq.ct                      -0.324   0.434
## Partial_VS_Inconsistent:typefreq.ct                    -0.361   0.423
## Partial_VS_Consistent:session.ct                        0.898   0.255
## Partial_VS_Inconsistent:session.ct                      0.036   0.239
## typefreq.ct:session.ct                                 -0.371   0.204
## Partial_VS_Consistent:typefreq.ct:session.ct           -0.616   0.502
## Partial_VS_Inconsistent:typefreq.ct:session.ct         -0.974   0.477
## ---------------------------------------------------------------------
##  
## ---------------------------------------------------------------------
##                                                        z val.       p
## ---------------------------------------------------- -------- -------
## (Intercept)                                            10.133   0.000
## Partial_VS_Consistent                                   3.048   0.002
## Partial_VS_Inconsistent                                -0.427   0.669
## typefreq.ct                                            -1.650   0.099
## session.ct                                             10.498   0.000
## Partial_VS_Consistent:typefreq.ct                      -0.746   0.455
## Partial_VS_Inconsistent:typefreq.ct                    -0.854   0.393
## Partial_VS_Consistent:session.ct                        3.516   0.000
## Partial_VS_Inconsistent:session.ct                      0.153   0.879
## typefreq.ct:session.ct                                 -1.819   0.069
## Partial_VS_Consistent:typefreq.ct:session.ct           -1.226   0.220
## Partial_VS_Inconsistent:typefreq.ct:session.ct         -2.044   0.041
## ---------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     0.755   
##  participant   session.ct      0.685   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      90      0.148 
## --------------------------------

There is an effect of session in the first model. We now check whether performance is above chance in both sessions.

# First get means for each session
summarySEwithin(data = prodOLDchild, "det_correct", betweenvars = NULL, withinvars = "session", idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   session    N det_correct det_correct_norm        sd         se
## 1       1 2171   0.5660986        0.5717909 0.6866936 0.01473781
## 2       4 2737   0.7493606        0.7448455 0.5745576 0.01098237
##           ci
## 1 0.02890171
## 2 0.02153458

# re-run model, removing overall intercept + effect of session and replcing with two separate intercepts, one for each session  

prodOLDchild.lmer1a = glmer(det_correct ~ + session + (Partial_VS_Consistent+Partial_VS_Inconsistent) * typefreq.ct  * session.ct -1 - session.ct + (1 + session.ct|participant), data = prodOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#demonstrate this is the same model as before
anova(prodOLDchild.lmer, prodOLDchild.lmer1a)

## Data: prodOLDchild
## Models:
## prodOLDchild.lmer: det_correct ~ (Partial_VS_Consistent + Partial_VS_Inconsistent) * 
## prodOLDchild.lmer:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDchild.lmer1a: det_correct ~ +session + (Partial_VS_Consistent + Partial_VS_Inconsistent) * 
## prodOLDchild.lmer1a:     typefreq.ct * session.ct - 1 - session.ct + (1 + session.ct | 
## prodOLDchild.lmer1a:     participant)
##                     Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDchild.lmer   15 5779.2 5876.7 -2874.6   5749.2             
## prodOLDchild.lmer1a 15 5779.2 5876.7 -2874.6   5749.2     0      0
##                     Pr(>Chisq)    
## prodOLDchild.lmer                 
## prodOLDchild.lmer1a  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# print out the coefficients for the two intercepts 
get_coeffs(prodOLDchild.lmer1a, c("session1", "session4"))

	Estimate	Std. Error	z value	Pr(>\|z\|)
session1	0.287	0.065	4.391	0
session4	1.403	0.127	11.077	0

More accurate in partial than inconsistent - look at the means:

summarySE(data = prodOLDchild, "det_correct", groupvars = "consistency", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##    consistency    N det_correct        sd         se         ci
## 1   Consistent 1638   0.7295482 0.4443289 0.01097862 0.02153361
## 2 Inconsistent 1580   0.6265823 0.4838648 0.01217294 0.02387682
## 3      Partial 1690   0.6479290 0.4777573 0.01162155 0.02279415

There is an interaction between the partial versus consistent contrast and the effect of session. Check whether there is an effect of this contrast in each of the two sessions

# First get means for each session
summarySEwithin(data = prodOLDchild, "det_correct", betweenvars = "consistency", withinvars = "session", idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##    consistency session   N det_correct det_correct_norm        sd
## 1   Consistent       1 731   0.5909713        0.5388601 0.6588449
## 2   Consistent       4 907   0.8412348        0.7726166 0.4914255
## 3 Inconsistent       1 668   0.5344311        0.5868609 0.7120325
## 4 Inconsistent       4 912   0.6940789        0.7279447 0.6130383
## 5      Partial       1 772   0.5699482        0.5899328 0.6888249
## 6      Partial       4 918   0.7135076        0.7341974 0.6088264
##           se         ci
## 1 0.02436826 0.04784023
## 2 0.01631752 0.03202452
## 3 0.02754937 0.05409392
## 4 0.02029973 0.03983966
## 5 0.02479135 0.04866655
## 6 0.02009427 0.03943609

# re-run model, removing overall contrast of Partial vs consistent averaged across sessions and interaction between this and session, and replacing with effect of contrast for each level of session  

prodOLDchild.lmer1b = glmer(det_correct ~ + Partial_VS_Consistent:session + (Partial_VS_Consistent+Partial_VS_Inconsistent) * typefreq.ct * session.ct -Partial_VS_Consistent - Partial_VS_Consistent:session.ct + (1 + session.ct|participant), data = prodOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

# check it is the same model
anova(prodOLDchild.lmer, prodOLDchild.lmer1b)

## Data: prodOLDchild
## Models:
## prodOLDchild.lmer: det_correct ~ (Partial_VS_Consistent + Partial_VS_Inconsistent) * 
## prodOLDchild.lmer:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDchild.lmer1b: det_correct ~ +Partial_VS_Consistent:session + (Partial_VS_Consistent + 
## prodOLDchild.lmer1b:     Partial_VS_Inconsistent) * typefreq.ct * session.ct - Partial_VS_Consistent - 
## prodOLDchild.lmer1b:     Partial_VS_Consistent:session.ct + (1 + session.ct | participant)
##                     Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDchild.lmer   15 5779.2 5876.7 -2874.6   5749.2             
## prodOLDchild.lmer1b 15 5779.2 5876.7 -2874.6   5749.2     0      0
##                     Pr(>Chisq)    
## prodOLDchild.lmer                 
## prodOLDchild.lmer1b  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# print out the coefficients for the effect of Partial vs consistent at each level of session
get_coeffs(prodOLDchild.lmer1b, c("Partial_VS_Consistent:session1", "Partial_VS_Consistent:session4"))

	Estimate	Std. Error	z value	Pr(>\|z\|)
Partial_VS_Consistent:session1	0.165	0.159	1.038	0.299
Partial_VS_Consistent:session4	1.063	0.308	3.457	0.001

Adults

prodOLDadult <- droplevels(subset(prodADULT, oldnew=="old"))
prodOLDadult <- lizCenter(prodOLDadult, list("typefreq", "session"))
# creates versions of these factors with centered coding
prodOLDadult = setContrasts(prodOLDadult, prodOLDadult$consistency, "Partial")
## adds columns for the contrasts Partial_VS_Consistent and Partial_VS_Inconsistent 

prodOLDadult.lmer = glmer(det_correct ~       (Partial_VS_Consistent+Partial_VS_Inconsistent)  * typefreq.ct * session.ct + (1 + session.ct|participant),data = prodOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(prodOLDadult.lmer, digits = 3)

## MODEL INFO:
## Observations: 3728
## Dependent Variable: det_correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 2082.140, BIC = 2175.494
## Pseudo-R² (fixed effects) = 0.360
## Pseudo-R² (total) = 0.727 
## 
## FIXED EFFECTS:
## ---------------------------------------------------------------------
##                                                          Est.    S.E.
## ---------------------------------------------------- -------- -------
## (Intercept)                                             3.843   0.380
## Partial_VS_Consistent                                   2.460   0.761
## Partial_VS_Inconsistent                                 0.290   0.666
## typefreq.ct                                            -1.193   0.586
## session.ct                                              3.123   0.626
## Partial_VS_Consistent:typefreq.ct                      -0.413   1.516
## Partial_VS_Inconsistent:typefreq.ct                    -1.355   1.327
## Partial_VS_Consistent:session.ct                        0.997   1.102
## Partial_VS_Inconsistent:session.ct                      1.409   0.858
## typefreq.ct:session.ct                                  0.709   0.800
## Partial_VS_Consistent:typefreq.ct:session.ct            2.140   2.142
## Partial_VS_Inconsistent:typefreq.ct:session.ct         -1.214   1.716
## ---------------------------------------------------------------------
##  
## ---------------------------------------------------------------------
##                                                        z val.       p
## ---------------------------------------------------- -------- -------
## (Intercept)                                            10.124   0.000
## Partial_VS_Consistent                                   3.232   0.001
## Partial_VS_Inconsistent                                 0.436   0.663
## typefreq.ct                                            -2.037   0.042
## session.ct                                              4.992   0.000
## Partial_VS_Consistent:typefreq.ct                      -0.272   0.785
## Partial_VS_Inconsistent:typefreq.ct                    -1.021   0.307
## Partial_VS_Consistent:session.ct                        0.904   0.366
## Partial_VS_Inconsistent:session.ct                      1.643   0.100
## typefreq.ct:session.ct                                  0.885   0.376
## Partial_VS_Consistent:typefreq.ct:session.ct            0.999   0.318
## Partial_VS_Inconsistent:typefreq.ct:session.ct         -0.708   0.479
## ---------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     1.853   
##  participant   session.ct      2.002   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      60      0.511 
## --------------------------------

There is an effect of session in the first model. We now check whether performance is above chance in both sessions.

# First get means for each session
summarySEwithin(data = prodOLDadult, "det_correct", betweenvars = NULL, withinvars = "session", idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   session    N det_correct det_correct_norm        sd          se
## 1       1 1815   0.8126722        0.8129596 0.5012923 0.011766647
## 2       4 1913   0.9409305        0.9406577 0.3101091 0.007090175
##           ci
## 1 0.02307760
## 2 0.01390529

# re-run model, removing overall intercept + effect of session and replcing with two separate intercepts, one for each session  

prodOLDadult.lmer2c = glmer(det_correct ~ - 1 + session + (Partial_VS_Consistent+Partial_VS_Inconsistent)* typefreq.ct * session.ct - session.ct  + (1 + session.ct|participant), data = prodOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

# check it is the same model as above
anova(prodOLDadult.lmer, prodOLDadult.lmer2c)

## Data: prodOLDadult
## Models:
## prodOLDadult.lmer: det_correct ~ (Partial_VS_Consistent + Partial_VS_Inconsistent) * 
## prodOLDadult.lmer:     typefreq.ct * session.ct + (1 + session.ct | participant)
## prodOLDadult.lmer2c: det_correct ~ -1 + session + (Partial_VS_Consistent + Partial_VS_Inconsistent) * 
## prodOLDadult.lmer2c:     typefreq.ct * session.ct - session.ct + (1 + session.ct | 
## prodOLDadult.lmer2c:     participant)
##                     Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodOLDadult.lmer   15 2082.1 2175.5 -1026.1   2052.1             
## prodOLDadult.lmer2c 15 2082.1 2175.5 -1026.1   2052.1     0      0
##                     Pr(>Chisq)
## prodOLDadult.lmer             
## prodOLDadult.lmer2c          1

#print out coefficients for each session
get_coeffs(prodOLDadult.lmer2c, c("session1", "session4"))

	Estimate	Std. Error	z value	Pr(>\|z\|)
session1	2.240	0.214	10.485	0
session4	5.363	0.655	8.193	0

# means for each level of type frequency
summarySEwithin(data = prodOLDadult, "det_correct", betweenvars = NULL, withinvars = "typefreq", idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   typefreq    N det_correct det_correct_norm        sd          se
## 1     High 1883   0.8343070        0.8784871 0.4900711 0.011293639
## 2      Low 1845   0.9235772        0.8784871 0.3439084 0.008006535
##           ci
## 1 0.02214937
## 2 0.01570283

# means for each level of type frequency
summarySEwithin(data = prodOLDadult, "det_correct", betweenvars = NULL, withinvars = "typefreq", idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   typefreq    N det_correct det_correct_norm        sd          se
## 1     High 1883   0.8343070        0.8784871 0.4900711 0.011293639
## 2      Low 1845   0.9235772        0.8784871 0.3439084 0.008006535
##           ci
## 1 0.02214937
## 2 0.01570283

More accurate in partial than inconsistent - look at the means:

summarySE(data = prodOLDadult, "det_correct", groupvars = "consistency", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##    consistency    N det_correct        sd          se         ci
## 1   Consistent 1253   0.9449322 0.2282037 0.006446844 0.01264781
## 2 Inconsistent 1274   0.8343799 0.3718852 0.010418957 0.02044021
## 3      Partial 1201   0.8559534 0.3512833 0.010136452 0.01988714

Comparing children and adults

An omnibus model comparing children and adults- coding as above

prodOLDboth <- droplevels(subset(prod1, oldnew=="old"))


prodOLDboth <- lizCenter(prodOLDboth, list("typefreq", "session", "age"))
# creates versions of these factors with centered coding

prodOLDboth = setContrasts(prodOLDboth, prodOLDboth$consistency, "Partial")
## adds columns for the contrasts Partial_VS_Consistent and Partial_VS_Inconsistent 

prodOLDboth.lmer = glmer(det_correct ~ (Partial_VS_Consistent + Partial_VS_Inconsistent) * typefreq.ct * session.ct * age.ct + (1 + session.ct|participant), data = prodOLDboth, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(prodOLDboth.lmer, digits = 3)

## MODEL INFO:
## Observations: 8636
## Dependent Variable: det_correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 7895.511, BIC = 8086.231
## Pseudo-R² (fixed effects) = 0.372
## Pseudo-R² (total) = 0.563 
## 
## FIXED EFFECTS:
## --------------------------------------------------------------------
##                                                                 Est.
## ----------------------------------------------------------- --------
## (Intercept)                                                    1.959
## Partial_VS_Consistent                                          1.328
## Partial_VS_Inconsistent                                        0.052
## typefreq.ct                                                   -0.660
## session.ct                                                     1.671
## age.ct                                                         2.348
## Partial_VS_Consistent:typefreq.ct                             -0.571
## Partial_VS_Inconsistent:typefreq.ct                           -0.698
## Partial_VS_Consistent:session.ct                               0.907
## Partial_VS_Inconsistent:session.ct                             0.543
## typefreq.ct:session.ct                                         0.115
## Partial_VS_Consistent:age.ct                                   1.342
## Partial_VS_Inconsistent:age.ct                                 0.313
## typefreq.ct:age.ct                                            -0.763
## session.ct:age.ct                                              1.144
## Partial_VS_Consistent:typefreq.ct:session.ct                   0.358
## Partial_VS_Inconsistent:typefreq.ct:session.ct                -0.946
## Partial_VS_Consistent:typefreq.ct:age.ct                      -0.435
## Partial_VS_Inconsistent:typefreq.ct:age.ct                    -0.776
## Partial_VS_Consistent:session.ct:age.ct                       -0.147
## Partial_VS_Inconsistent:session.ct:age.ct                      1.181
## typefreq.ct:session.ct:age.ct                                  1.195
## Partial_VS_Consistent:typefreq.ct:session.ct:age.ct            2.497
## Partial_VS_Inconsistent:typefreq.ct:session.ct:age.ct          0.200
## --------------------------------------------------------------------
##  
## -------------------------------------------------------------------
##                                                                S.E.
## ----------------------------------------------------------- -------
## (Intercept)                                                   0.118
## Partial_VS_Consistent                                         0.274
## Partial_VS_Inconsistent                                       0.246
## typefreq.ct                                                   0.215
## session.ct                                                    0.159
## age.ct                                                        0.237
## Partial_VS_Consistent:typefreq.ct                             0.539
## Partial_VS_Inconsistent:typefreq.ct                           0.492
## Partial_VS_Consistent:session.ct                              0.346
## Partial_VS_Inconsistent:session.ct                            0.272
## typefreq.ct:session.ct                                        0.261
## Partial_VS_Consistent:age.ct                                  0.567
## Partial_VS_Inconsistent:age.ct                                0.509
## typefreq.ct:age.ct                                            0.451
## session.ct:age.ct                                             0.312
## Partial_VS_Consistent:typefreq.ct:session.ct                  0.670
## Partial_VS_Inconsistent:typefreq.ct:session.ct                0.544
## Partial_VS_Consistent:typefreq.ct:age.ct                      1.128
## Partial_VS_Inconsistent:typefreq.ct:age.ct                    1.007
## Partial_VS_Consistent:session.ct:age.ct                       0.728
## Partial_VS_Inconsistent:session.ct:age.ct                     0.574
## typefreq.ct:session.ct:age.ct                                 0.564
## Partial_VS_Consistent:typefreq.ct:session.ct:age.ct           1.465
## Partial_VS_Inconsistent:typefreq.ct:session.ct:age.ct         1.138
## -------------------------------------------------------------------
##  
## ----------------------------------------------------------------------------
##                                                               z val.       p
## ----------------------------------------------------------- -------- -------
## (Intercept)                                                   16.602   0.000
## Partial_VS_Consistent                                          4.853   0.000
## Partial_VS_Inconsistent                                        0.210   0.834
## typefreq.ct                                                   -3.068   0.002
## session.ct                                                    10.481   0.000
## age.ct                                                         9.924   0.000
## Partial_VS_Consistent:typefreq.ct                             -1.061   0.289
## Partial_VS_Inconsistent:typefreq.ct                           -1.419   0.156
## Partial_VS_Consistent:session.ct                               2.623   0.009
## Partial_VS_Inconsistent:session.ct                             1.998   0.046
## typefreq.ct:session.ct                                         0.441   0.659
## Partial_VS_Consistent:age.ct                                   2.366   0.018
## Partial_VS_Inconsistent:age.ct                                 0.615   0.538
## typefreq.ct:age.ct                                            -1.693   0.090
## session.ct:age.ct                                              3.673   0.000
## Partial_VS_Consistent:typefreq.ct:session.ct                   0.535   0.593
## Partial_VS_Inconsistent:typefreq.ct:session.ct                -1.740   0.082
## Partial_VS_Consistent:typefreq.ct:age.ct                      -0.385   0.700
## Partial_VS_Inconsistent:typefreq.ct:age.ct                    -0.771   0.441
## Partial_VS_Consistent:session.ct:age.ct                       -0.201   0.840
## Partial_VS_Inconsistent:session.ct:age.ct                      2.056   0.040
## typefreq.ct:session.ct:age.ct                                  2.117   0.034
## Partial_VS_Consistent:typefreq.ct:session.ct:age.ct            1.705   0.088
## Partial_VS_Inconsistent:typefreq.ct:session.ct:age.ct          0.175   0.861
## ----------------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     1.101   
##  participant   session.ct      0.951   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant     150      0.269 
## --------------------------------

Novel nouns

Plot - Figure 2

PRODNEW <- droplevels(subset(prod1, oldnew =="new" & consistency!="Inconsistent"))

# Aggregate to obtain means per participant/day etc.
aggregated.PRODNEW = aggregate(det_correct ~ participant + awareness + session_fig + consistency_fig + typefreq + age, PRODNEW, FUN=mean)

#re-order levels of the semantic consistency variable
aggregated.PRODNEW$consistency_fig = factor(aggregated.PRODNEW$consistency_fig, levels(aggregated.PRODNEW$consistency_fig)[c(1,3,2)])
levels(aggregated.PRODNEW$consistency_fig)

## [1] "Fully Consistent"     "Partially Consistent"

# Re-order levels within the age variable (the default is alphabetical: 1 = adult, 2 = child, we want to reverse this order)
aggregated.PRODNEW$age = factor(aggregated.PRODNEW$age, levels(aggregated.PRODNEW$age)[c(2,1)])
levels(aggregated.PRODNEW$age)

## [1] "Adult" "Child"

#for plotting, setting low type frequency first
aggregated.PRODNEW$typefreq_fig <- aggregated.PRODNEW$typefreq
aggregated.PRODNEW$typefreq_fig <- relevel(aggregated.PRODNEW$typefreq_fig,ref="Low")

#New factor combining type frequency and session
aggregated.PRODNEW$freq_session <- factor(paste(aggregated.PRODNEW$typefreq_fig,aggregated.PRODNEW$session_fig),
                                          levels=c("Low Session 1","Low Session 4","High Session 1", "High Session 4"))

#Combine typefreq and awareness, coding everyone in Inconsistent as unaware first
aggregated.PRODNEW$awareness2 <- ifelse(aggregated.PRODNEW$consistency_fig=="Inconsistent",0,ifelse(aggregated.PRODNEW$awareness==0,0,1))

aggregated.PRODNEW$typefreq_awareness <- factor(paste(aggregated.PRODNEW$typefreq_fig,aggregated.PRODNEW$awareness2))


aggregated.PRODNEW$colourfill <- ifelse(aggregated.PRODNEW$typefreq_awareness=="High 0","transparent",
                                        ifelse(aggregated.PRODNEW$typefreq_awareness=="High 1",high_freq_colour,
                                               ifelse(aggregated.PRODNEW$typefreq_awareness=="Low 0","transparent",
                                                      ifelse(aggregated.PRODNEW$typefreq_awareness=="Low 1",low_freq_colour,"green")))) 

#order colourfill to match order used in plots
colourfill <- aggregated.PRODNEW$colourfill
colourfill <- colourfill[with(aggregated.PRODNEW,order(freq_session,consistency_fig,age,det_correct))]

#plot it
ggplot(aggregated.PRODNEW, aes(x = freq_session, y = det_correct, colour=typefreq_fig)) +
  stat_summary(fun.y = mean, geom = "point", colour='black',shape=18,size=3) +
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4,colour='black') +
  geom_dotplot(fill=colourfill,binaxis='y',stackdir='center') +
  geom_hline(aes(yintercept=0.50),linetype="dashed") +
  facet_grid(consistency_fig ~ age) + 
  theme_bw() + mytheme +
  coord_cartesian(ylim = c(0, 1)) + 
  xlab("Session") + ylab("Proportion Correct") +
  scale_x_discrete(labels=c("1", "4", "1", "4")) +
  scale_colour_manual(values = c(low_freq_colour,high_freq_colour)) +
  guides(colour=guide_legend(title='Type Frequency', override.aes=list(fill=c(low_freq_colour,high_freq_colour)))) +
  theme(legend.position="bottom") +
  ggsave("_Figures/figure2.pdf",width=6,height=6)

## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

Statistical Analyses

Note: For novel nouns we only include data from the fully consistent and partially consistent conditions. The inconsistent condition cannot be included since there is no “correct” (or majority) particle for semantic category, so no way to score accuracy of particle usage for novel nouns.

Children

Run model- all factors have two levels and are given centered numeric coding.

prodNEWchild <- droplevels(subset(prod1, age == "Child" & oldnew == "new" & consistency != "Inconsistent"))

prodNEWchild$consistency= relevel(prodNEWchild$consistency,ref = "Partial" )
prodNEWchild <- lizCenter(prodNEWchild, list("consistency", "typefreq", "session"))
## creates versions of these factors with centered coding

prodNEWchild.lmer1 = glmer(det_correct ~ + consistency.ct * typefreq.ct * session.ct + (1 + session.ct|participant), data = prodNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(prodNEWchild.lmer1, digits = 3)

## MODEL INFO:
## Observations: 3215
## Dependent Variable: det_correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 4162.001, BIC = 4228.832
## Pseudo-R² (fixed effects) = 0.045
## Pseudo-R² (total) = 0.189 
## 
## FIXED EFFECTS:
## -----------------------------------------------------------------------------
##                                                 Est.    S.E.   z val.       p
## ------------------------------------------- -------- ------- -------- -------
## (Intercept)                                    0.360   0.105    3.425   0.001
## consistency.ct                                 0.663   0.210    3.155   0.002
## typefreq.ct                                    0.135   0.209    0.644   0.520
## session.ct                                     0.388   0.093    4.146   0.000
## consistency.ct:typefreq.ct                    -0.377   0.419   -0.899   0.368
## consistency.ct:session.ct                      0.504   0.186    2.706   0.007
## typefreq.ct:session.ct                         0.127   0.182    0.700   0.484
## consistency.ct:typefreq.ct:session.ct          0.234   0.365    0.640   0.522
## -----------------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     0.744   
##  participant   session.ct      0.369   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      60      0.144 
## --------------------------------

Break down the interaction between semantic consistency and session. Figure 2 suggests that the improvement in performance between sessions is greater in the fully consistent condition.

# Get means for each session
summarySEwithin(data = prodNEWchild, "det_correct", betweenvars = NULL, withinvars = "session", idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   session    N det_correct det_correct_norm        sd         se
## 1       1 1452   0.5275482        0.5318962 0.6805480 0.01785975
## 2       4 1763   0.6006807        0.5970997 0.6436599 0.01532958
##           ci
## 1 0.03503370
## 2 0.03006608

# Get means for each consistency condition
summarySEwithin(data = prodNEWchild, "det_correct", betweenvars = "consistency", withinvars = NULL, idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   consistency    N det_correct det_correct_norm  sd  se  ci
## 1  Consistent 1567   0.6317805        0.5676516 Inf Inf Inf
## 2     Partial 1648   0.5066748        0.5676516 Inf Inf Inf

# Get means split by both session and consistency
summarySEwithin(data = prodNEWchild, "det_correct", betweenvars = "consistency", withinvars = "session", idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   consistency session   N det_correct det_correct_norm        sd
## 1  Consistent       1 711   0.5668073        0.5129356 0.6691768
## 2  Consistent       4 856   0.6857477        0.6130992 0.5943419
## 3     Partial       1 741   0.4898785        0.5500891 0.6907558
## 4     Partial       4 907   0.5203969        0.5819998 0.6866106
##           se         ci
## 1 0.02509609 0.04927142
## 2 0.02031419 0.03987152
## 3 0.02537555 0.04981664
## 4 0.02279853 0.04474407

# Run version of model with the main effect of seesion and interaction between session and consistency removed and replaced with  effect of session at each level of consistency
prodNEWchild.lmer2 = glmer(det_correct ~ + session.ct:consistency + (consistency.ct * typefreq.ct * session.ct) - session.ct - session.ct:consistency.ct + (1 + session.ct|participant), data = prodNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#check is the same model
anova(prodNEWchild.lmer1, prodNEWchild.lmer2)

## Data: prodNEWchild
## Models:
## prodNEWchild.lmer1: det_correct ~ +consistency.ct * typefreq.ct * session.ct + (1 + 
## prodNEWchild.lmer1:     session.ct | participant)
## prodNEWchild.lmer2: det_correct ~ +session.ct:consistency + (consistency.ct * typefreq.ct * 
## prodNEWchild.lmer2:     session.ct) - session.ct - session.ct:consistency.ct + (1 + 
## prodNEWchild.lmer2:     session.ct | participant)
##                    Df  AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## prodNEWchild.lmer1 11 4162 4228.8  -2070     4140                        
## prodNEWchild.lmer2 11 4162 4228.8  -2070     4140     0      0          1

# print out coefficients for the effect of session for each of the consistent and partial conditions
get_coeffs(prodNEWchild.lmer2, c("session.ct:consistencyConsistent", "session.ct:consistencyPartial"))

	Estimate	Std. Error	z value	Pr(>\|z\|)
session.ct:consistencyConsistent	0.646	0.141	4.596	0.000
session.ct:consistencyPartial	0.142	0.123	1.149	0.251

Are they above chance in each condition in each session?

#Re-fit the model removing the intercept, main effects of session and consistency, and #session by consistency interaction, adding in an intercept for each session in each #condition. 

prodNEWchild.lmer3 = glmer(det_correct ~ - 1 + session: consistency + (consistency.ct * typefreq.ct * session.ct) - session.ct * consistency.ct + (1 + session.ct|participant),
data = prodNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#check its the same model
anova(prodNEWchild.lmer1, prodNEWchild.lmer3)

## Data: prodNEWchild
## Models:
## prodNEWchild.lmer1: det_correct ~ +consistency.ct * typefreq.ct * session.ct + (1 + 
## prodNEWchild.lmer1:     session.ct | participant)
## prodNEWchild.lmer3: det_correct ~ -1 + session:consistency + (consistency.ct * typefreq.ct * 
## prodNEWchild.lmer3:     session.ct) - session.ct * consistency.ct + (1 + session.ct | 
## prodNEWchild.lmer3:     participant)
##                    Df  AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## prodNEWchild.lmer1 11 4162 4228.8  -2070     4140                        
## prodNEWchild.lmer3 11 4162 4228.8  -2070     4140     0      0          1

#print out coefficients for four intercepts,one for each level of session and consistency
get_coeffs(prodNEWchild.lmer3, c("session1:consistencyConsistent", "session4:consistencyConsistent", "session1:consistencyPartial", "session4:consistencyPartial"))

	Estimate	Std. Error	z value	Pr(>\|z\|)
session1:consistencyConsistent	0.345	0.130	2.663	0.008
session4:consistencyConsistent	0.991	0.192	5.154	0.000
session1:consistencyPartial	-0.041	0.125	-0.326	0.745
session4:consistencyPartial	0.101	0.181	0.559	0.576

Adults

Run model- all factors have two levels and are given centered numeric coding.

prodNEWadult <- droplevels(subset(prod1, age == "Adult" & oldnew == "new" & consistency != "Inconsistent"))

prodNEWadult$consistency = relevel(prodNEWadult$consistency, ref = "Partial")

prodNEWadult <- lizCenter(prodNEWadult, list("consistency", "typefreq", "session"))
## creates versions of these factors with centered coding

prodNEWadult.lmer1 = glmer(det_correct ~    + consistency.ct * typefreq.ct * session.ct  + (1 + session.ct|participant), data = prodNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(prodNEWadult.lmer1, digits = 3)

## MODEL INFO:
## Observations: 2436
## Dependent Variable: det_correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 1327.707, BIC = 1391.487
## Pseudo-R² (fixed effects) = 0.287
## Pseudo-R² (total) = 0.808 
## 
## FIXED EFFECTS:
## -----------------------------------------------------------------------------
##                                                 Est.    S.E.   z val.       p
## ------------------------------------------- -------- ------- -------- -------
## (Intercept)                                    4.311   0.634    6.801   0.000
## consistency.ct                                 3.443   1.030    3.342   0.001
## typefreq.ct                                   -0.277   1.010   -0.275   0.784
## session.ct                                     2.216   1.023    2.165   0.030
## consistency.ct:typefreq.ct                    -1.622   1.987   -0.816   0.414
## consistency.ct:session.ct                      1.892   1.429    1.323   0.186
## typefreq.ct:session.ct                         1.619   1.352    1.198   0.231
## consistency.ct:typefreq.ct:session.ct          3.926   2.606    1.506   0.132
## -----------------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     2.632   
##  participant   session.ct      2.812   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      40      0.678 
## --------------------------------

Are they above chance in each condition in each session?

# Get means for each session
summarySEwithin(data = prodNEWadult, "det_correct", betweenvars = NULL, withinvars = "session", idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   session    N det_correct det_correct_norm        sd         se
## 1       1 1168   0.7859589        0.7908021 0.4546624 0.01330355
## 2       4 1268   0.8761830        0.8717217 0.4092315 0.01149237
##           ci
## 1 0.02610155
## 2 0.02254616

# Get means for each consistency condition
summarySEwithin(data = prodNEWadult, "det_correct", betweenvars = "consistency", withinvars = NULL, idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   consistency    N det_correct det_correct_norm  sd  se  ci
## 1  Consistent 1250   0.9120000        0.8329228 Inf Inf Inf
## 2     Partial 1186   0.7495784        0.8329228 Inf Inf Inf

#Re-fit the model removing the intercept, main effects of session and consistency, and #session by consistency interaction, adding in an intercept for each session in each #condition. 

prodNEWadult.lmer2 = glmer(det_correct ~ - 1  + session:consistency + (consistency.ct * typefreq.ct * session.ct) - session.ct * consistency.ct  + (1 + session.ct|participant), data = prodNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#check it is the same model
anova(prodNEWadult.lmer1, prodNEWadult.lmer2)

## Data: prodNEWadult
## Models:
## prodNEWadult.lmer1: det_correct ~ +consistency.ct * typefreq.ct * session.ct + (1 + 
## prodNEWadult.lmer1:     session.ct | participant)
## prodNEWadult.lmer2: det_correct ~ -1 + session:consistency + (consistency.ct * typefreq.ct * 
## prodNEWadult.lmer2:     session.ct) - session.ct * consistency.ct + (1 + session.ct | 
## prodNEWadult.lmer2:     participant)
##                    Df    AIC    BIC  logLik deviance Chisq Chi Df
## prodNEWadult.lmer1 11 1327.7 1391.5 -652.85   1305.7             
## prodNEWadult.lmer2 11 1327.7 1391.5 -652.85   1305.7     0      0
##                    Pr(>Chisq)
## prodNEWadult.lmer1           
## prodNEWadult.lmer2          1

# print out coefficients for four intercepts, one for each level of consistency in each session

get_coeffs(prodNEWadult.lmer2,c("session1:consistencyConsistent","session4:consistencyConsistent", "session1:consistencyPartial", "session4:consistencyPartial"))

	Estimate	Std. Error	z value	Pr(>\|z\|)
session1:consistencyConsistent	4.354	0.833	5.224	0.000
session4:consistencyConsistent	7.491	1.456	5.146	0.000
session1:consistencyPartial	1.896	0.656	2.890	0.004
session4:consistencyPartial	3.141	0.928	3.383	0.001

Comparing children and adults

An omnibus model comparing children and adults- coding as for child/adult models but with age and all interactions with age

prodNEWboth <- droplevels(subset(prod1, oldnew=="new" & consistency != "Inconsistent"))
prodNEWboth$consistency = relevel(prodNEWboth$consistency, ref="Partial")

prodNEWboth <- lizCenter(prodNEWboth, list("consistency","typefreq", "session", "age"))
## creates versions of these factors with centered coding

prodNEWboth.lmer1 = glmer(det_correct ~  + consistency.ct * typefreq.ct * session.ct * age.ct + (1 + session.ct|participant),data = prodNEWboth, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(prodNEWboth.lmer1, digits = 3)

## MODEL INFO:
## Observations: 5651
## Dependent Variable: det_correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 5580.204, BIC = 5706.356
## Pseudo-R² (fixed effects) = 0.369
## Pseudo-R² (total) = 0.607 
## 
## FIXED EFFECTS:
## -------------------------------------------------------------------
##                                                        Est.    S.E.
## -------------------------------------------------- -------- -------
## (Intercept)                                           1.588   0.160
## consistency.ct                                        1.528   0.307
## typefreq.ct                                          -0.170   0.305
## session.ct                                            0.804   0.183
## age.ct                                                2.727   0.330
## consistency.ct:typefreq.ct                           -0.872   0.607
## consistency.ct:session.ct                             0.910   0.321
## typefreq.ct:session.ct                                0.536   0.314
## consistency.ct:age.ct                                 1.766   0.636
## typefreq.ct:age.ct                                   -0.696   0.636
## session.ct:age.ct                                     0.945   0.380
## consistency.ct:typefreq.ct:session.ct                 1.346   0.621
## consistency.ct:typefreq.ct:age.ct                    -1.117   1.256
## consistency.ct:session.ct:age.ct                      0.997   0.680
## typefreq.ct:session.ct:age.ct                         0.922   0.679
## consistency.ct:typefreq.ct:session.ct:age.ct          2.501   1.339
## -------------------------------------------------------------------
##  
## -------------------------------------------------------------------
##                                                      z val.       p
## -------------------------------------------------- -------- -------
## (Intercept)                                           9.949   0.000
## consistency.ct                                        4.978   0.000
## typefreq.ct                                          -0.557   0.577
## session.ct                                            4.387   0.000
## age.ct                                                8.264   0.000
## consistency.ct:typefreq.ct                           -1.436   0.151
## consistency.ct:session.ct                             2.830   0.005
## typefreq.ct:session.ct                                1.707   0.088
## consistency.ct:age.ct                                 2.775   0.006
## typefreq.ct:age.ct                                   -1.094   0.274
## session.ct:age.ct                                     2.487   0.013
## consistency.ct:typefreq.ct:session.ct                 2.166   0.030
## consistency.ct:typefreq.ct:age.ct                    -0.889   0.374
## consistency.ct:session.ct:age.ct                      1.466   0.143
## typefreq.ct:session.ct:age.ct                         1.357   0.175
## consistency.ct:typefreq.ct:session.ct:age.ct          1.867   0.062
## -------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     1.332   
##  participant   session.ct      0.933   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant     100      0.350 
## --------------------------------

Two-Alternative Forced Choice (2AFC) Test

Set up and coding

afc = read.csv("afc.csv")
levels(afc$typefreq)

## [1] "High" "Low"

afc$typefreq= relevel(afc$typefreq, ref ="Low")
afc$age= relevel(afc$age, ref ="Child")

Trained Nouns

Plot- Figure 1

afcOLD <- droplevels(subset(afc, oldnew == "old"))

# Aggregate to obtain means per participant/day etc.
aggregated.afcOLD = aggregate(correct ~ participant + awareness + consistency_fig + typefreq + age, afcOLD, FUN=mean)

# Re-order levels of the semantic consistency variable (the defauly is alphabetical)
aggregated.afcOLD$consistency_fig = factor(aggregated.afcOLD$consistency_fig,levels(aggregated.afcOLD$consistency_fig)[c(1,3,2)])
levels(aggregated.afcOLD$consistency_fig)

## [1] "Fully Consistent"     "Partially Consistent" "Inconsistent"

# Re-order levels of the age variable (the defauly is alphabetical)
aggregated.afcOLD$age = factor(aggregated.afcOLD$age, levels(aggregated.afcOLD$age)[c(2,1)])
levels(aggregated.afcOLD$age)

## [1] "Adult" "Child"

#for plotting, setting low type frequency first
aggregated.afcOLD$typefreq_fig <- aggregated.afcOLD$typefreq
aggregated.afcOLD$typefreq_fig <- relevel(aggregated.afcOLD$typefreq_fig,ref="Low")


#In order to have point fill style depending on both typefreq and awareness, need to combine those.
#First, coding everyone in Inconsistent as unaware.
aggregated.afcOLD$awareness2 <- ifelse(aggregated.afcOLD$consistency_fig=="Inconsistent",0,ifelse(aggregated.afcOLD$awareness==0,0,1))

aggregated.afcOLD$typefreq_awareness <- factor(paste(aggregated.afcOLD$typefreq_fig,aggregated.afcOLD$awareness2))

aggregated.afcOLD$colourfill <- ifelse(aggregated.afcOLD$typefreq_awareness=="High 0","transparent",
                                        ifelse(aggregated.afcOLD$typefreq_awareness=="High 1",high_freq_colour,
                                               ifelse(aggregated.afcOLD$typefreq_awareness=="Low 0","transparent",
                                                      ifelse(aggregated.afcOLD$typefreq_awareness=="Low 1",low_freq_colour,"green")))) 

colourfill <- aggregated.afcOLD$colourfill
colourfill <- colourfill[with(aggregated.afcOLD,order(typefreq_fig,consistency_fig,age,correct))]


ggplot(aggregated.afcOLD, aes(x = typefreq_fig, y = correct, colour=typefreq_fig)) +
  stat_summary(fun.y = mean, geom = "point", colour='black',shape=18,size=3) +
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4,colour='black') +
  geom_dotplot(fill=colourfill,binaxis='y',stackdir='center') +
  geom_hline(aes(yintercept=0.50),linetype="dashed") +
  facet_grid(consistency_fig ~ age) + 
  xlab("") + ylab("Proportion Correct") +
  coord_cartesian(ylim = c(0, 1)) + 
  theme_bw() + mytheme +
  scale_colour_manual(values = c(low_freq_colour,high_freq_colour)) +
  guides(colour=guide_legend(title='Type Frequency', override.aes=list(fill=c(low_freq_colour,high_freq_colour)))) +
  theme(legend.position='bottom') +
  ggsave("_Figures/figure3.pdf",width=4,height=6)

## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

Main Statistical Analyses

Children

afcOLDchild <- droplevels(subset(afc, age == "Child" & oldnew == "old"))

afcOLDchild <- lizCenter(afcOLDchild, list("typefreq"))
## creates versions of these factors with centered coding

afcOLDchild = setContrasts(afcOLDchild, afcOLDchild$consistency, "Partial")
## adds Partial_VS_Consistent+Partial_VS_Inconsistent constrasts


afcOLDchild.lmer = glmer(correct ~  (Partial_VS_Consistent+Partial_VS_Inconsistent ) * typefreq.ct + (1 |participant), data = afcOLDchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(afcOLDchild.lmer, digits = 3)

## MODEL INFO:
## Observations: 720
## Dependent Variable: correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 835.450, BIC = 867.505
## Pseudo-R² (fixed effects) = 0.031
## Pseudo-R² (total) = 0.201 
## 
## FIXED EFFECTS:
## ---------------------------------------------------------------------------
##                                               Est.    S.E.   z val.       p
## ----------------------------------------- -------- ------- -------- -------
## (Intercept)                                  1.103   0.132    8.326   0.000
## Partial_VS_Consistent                        0.731   0.316    2.310   0.021
## Partial_VS_Inconsistent                     -0.041   0.301   -0.135   0.893
## typefreq.ct                                 -0.092   0.253   -0.364   0.716
## Partial_VS_Consistent:typefreq.ct            0.039   0.629    0.062   0.951
## Partial_VS_Inconsistent:typefreq.ct          0.186   0.602    0.310   0.757
## ---------------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     0.836   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      90      0.175 
## --------------------------------

# Get means for each consistency condition
summarySEwithin(data = afcOLDchild, "correct", betweenvars = "consistency", withinvars = NULL, idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##    consistency   N   correct correct_norm  sd  se  ci
## 1   Consistent 240 0.8000000    0.7180556 Inf Inf Inf
## 2 Inconsistent 240 0.6708333    0.7180556 Inf Inf Inf
## 3      Partial 240 0.6833333    0.7180556 Inf Inf Inf

Adults

Tried running a model with thesame structure as for children, however there were issues with convergence. We simplify by removing the consistency x type-frequency interaction.

afcOLDadult <- droplevels(subset(afc, age == "Adult" & oldnew == "old"))

afcOLDadult <- lizCenter(afcOLDadult, list("typefreq"))
## creates versions of these factors with centered coding

afcOLDadult = setContrasts(afcOLDadult, afcOLDadult$consistency, "Partial")
## adds Partial_VS_Consistent+Partial_VS_Inconsistent constrasts

afcOLDadult.lmer = glmer(correct ~  (Partial_VS_Consistent+Partial_VS_Inconsistent) +typefreq.ct + (1 |participant), data = afcOLDadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(afcOLDadult.lmer, digits = 3)

## MODEL INFO:
## Observations: 480
## Dependent Variable: correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 199.848, BIC = 220.717
## Pseudo-R² (fixed effects) = 0.087
## Pseudo-R² (total) = 0.826 
## 
## FIXED EFFECTS:
## ---------------------------------------------------------------
##                                   Est.    S.E.   z val.       p
## ----------------------------- -------- ------- -------- -------
## (Intercept)                      6.087   1.533    3.970   0.000
## Partial_VS_Consistent            2.816   1.710    1.647   0.100
## Partial_VS_Inconsistent          1.109   1.389    0.798   0.425
## typefreq.ct                     -1.087   1.258   -0.864   0.388
## ---------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     3.742   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      60      0.810 
## --------------------------------

Comparing children and adults

An omnibus model comparing children and adults- coding as for child/adult models but with age and all interactions with age. As for adult model, we remove the interaction between consitency and type frequency

afcOLDboth <- droplevels(subset(afc, oldnew=="old"))

afcOLDboth <- lizCenter(afcOLDboth, list("typefreq", "age"))
## creates versions of these factors with centered coding

afcOLDboth = setContrasts(afcOLDboth, afcOLDboth$consistency, "Partial")
## adds Partial_VS_Consistent+Partial_VS_Inconsistent constrasts

afcOLDboth.lmerv2 = glmer(correct ~ ((Partial_VS_Consistent+Partial_VS_Inconsistent ) + typefreq.ct)*age.ct + (1 |participant), data = afcOLDboth, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(afcOLDboth.lmerv2, digits = 3)

## MODEL INFO:
## Observations: 1200
## Dependent Variable: correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 1047.447, BIC = 1093.257
## Pseudo-R² (fixed effects) = 0.257
## Pseudo-R² (total) = 0.478 
## 
## FIXED EFFECTS:
## ----------------------------------------------------------------------
##                                          Est.    S.E.   z val.       p
## ------------------------------------ -------- ------- -------- -------
## (Intercept)                             2.101   0.180   11.693   0.000
## Partial_VS_Consistent                   1.259   0.387    3.255   0.001
## Partial_VS_Inconsistent                 0.389   0.337    1.156   0.248
## typefreq.ct                            -0.389   0.294   -1.324   0.185
## age.ct                                  2.248   0.361    6.235   0.000
## Partial_VS_Consistent:age.ct            1.076   0.843    1.276   0.202
## Partial_VS_Inconsistent:age.ct          1.017   0.727    1.399   0.162
## typefreq.ct:age.ct                     -0.710   0.643   -1.104   0.270
## ----------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     1.181   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant     150      0.298 
## --------------------------------

Novel Nouns

As in the production task, data from the inconsistent condition were not included in the novel nouns analyses.

Plot - Figure 4

afcNEW <- droplevels(subset(afc, oldnew == "new" & consistency != "Inconsistent"))

# Aggregate to obtain means per participant/day etc.
aggregated.afcNEW = aggregate(correct ~ participant + + awareness + consistency_fig + typefreq + age, afcNEW, FUN = mean)

# Re-order levels within the age variable (the default is alphabetical: 1 = adult, 2 = child, we want to reverse this order)
aggregated.afcNEW$age = factor(aggregated.afcNEW$age, levels(aggregated.afcNEW$age)[c(2,1)])

#for plotting, setting low type frequency first
aggregated.afcNEW$typefreq_fig <- aggregated.afcNEW$typefreq
aggregated.afcNEW$typefreq_fig <- relevel(aggregated.afcNEW$typefreq_fig,ref="Low")

#Combined variable of type frequency and awareness
aggregated.afcNEW$typefreq_awareness <- factor(paste(aggregated.afcNEW$typefreq_fig,aggregated.afcNEW$awareness))


aggregated.afcNEW$colourfill <- ifelse(aggregated.afcNEW$typefreq_awareness=="High 0","transparent",
                                        ifelse(aggregated.afcNEW$typefreq_awareness=="High 1",high_freq_colour,
                                               ifelse(aggregated.afcNEW$typefreq_awareness=="Low 0","transparent",
                                                      ifelse(aggregated.afcNEW$typefreq_awareness=="Low 1",low_freq_colour,"green")))) 


colourfill <- aggregated.afcNEW$colourfill
colourfill <- colourfill[with(aggregated.afcNEW,order(typefreq_fig,consistency_fig,age,correct))]


ggplot(aggregated.afcNEW, aes(x = typefreq_fig, y = correct, colour=typefreq_fig)) +
  stat_summary(fun.y = mean, geom = "point", colour='black',shape=18,size=3) +
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width=.4,colour='black') +
  geom_dotplot(fill=colourfill,binaxis='y',stackdir='center') +
  geom_hline(aes(yintercept=0.50),linetype="dashed") +
  facet_grid(consistency_fig ~ age) + 
  theme_bw() + mytheme +
  xlab("") + ylab("Proportion Correct") +
  coord_cartesian(ylim = c(0, 1)) + 
  scale_colour_manual(values = c(low_freq_colour,high_freq_colour)) +
  guides(colour=guide_legend(title='Type Frequency', override.aes=list(fill=c(low_freq_colour,high_freq_colour)))) +
  theme(legend.position="bottom") +
  ggsave("_Figures/figure4.pdf",width=4,height=6)

## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

Statistical Analyses

Children

Run model- all factors have two levels and are given centered numeric coding.

afcNEWchild <- droplevels(subset(afc, age == "Child" & oldnew == "new" & consistency != "Inconsistent"))
afcNEWchild$consistency <-relevel(afcNEWchild$consistency, ref="Partial")
afcNEWchild$typefreq <- relevel(afcNEWchild$typefreq,ref="Low")

afcNEWchild <- lizCenter(afcNEWchild, list("consistency", "typefreq"))
## creates versions of these factors with centered coding

afcNEWchild.lmer1 = glmer(correct ~ + consistency.ct * typefreq.ct + (1|participant),    data = afcNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(afcNEWchild.lmer1, digits = 3)

## MODEL INFO:
## Observations: 480
## Dependent Variable: correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 585.047, BIC = 605.916
## Pseudo-R² (fixed effects) = 0.085
## Pseudo-R² (total) = 0.357 
## 
## FIXED EFFECTS:
## -----------------------------------------------------------------
##                                     Est.    S.E.   z val.       p
## -------------------------------- ------- ------- -------- -------
## (Intercept)                        0.676   0.194    3.484   0.000
## consistency.ct                     1.252   0.386    3.241   0.001
## typefreq.ct                        0.381   0.380    1.004   0.315
## consistency.ct:typefreq.ct         0.366   0.759    0.482   0.630
## -----------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     1.180   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      60      0.297 
## --------------------------------

# Get means for each consistency condition
summarySEwithin(data = afcNEWchild, "correct", betweenvars = "consistency", withinvars = NULL, idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   consistency   N   correct correct_norm  sd  se  ci
## 1  Consistent 240 0.7208333    0.6145833 Inf Inf Inf
## 2     Partial 240 0.5083333    0.6145833 Inf Inf Inf

Is Performance Above Chance for each condition separately?

# Re run model removing intercept and main effect of consistency and replacing with intercept for each of consistent and partial separately  
afcNEWchild.lmer2 = glmer(correct ~ + consistency + consistency.ct * typefreq.ct - 1      - consistency.ct + (1|participant), data = afcNEWchild, family = binomial, control = 

glmerControl(optimizer = "bobyqa"))
anova(afcNEWchild.lmer1, afcNEWchild.lmer2)

## Data: afcNEWchild
## Models:
## afcNEWchild.lmer1: correct ~ +consistency.ct * typefreq.ct + (1 | participant)
## afcNEWchild.lmer2: correct ~ +consistency + consistency.ct * typefreq.ct - 1 - consistency.ct + 
## afcNEWchild.lmer2:     (1 | participant)
##                   Df    AIC    BIC  logLik deviance Chisq Chi Df
## afcNEWchild.lmer1  5 585.05 605.92 -287.52   575.05             
## afcNEWchild.lmer2  5 585.05 605.92 -287.52   575.05     0      0
##                   Pr(>Chisq)    
## afcNEWchild.lmer1               
## afcNEWchild.lmer2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

get_coeffs(afcNEWchild.lmer2, c("consistencyConsistent", "consistencyPartial"))

	Estimate	Std. Error	z value	Pr(>\|z\|)
consistencyConsistent	1.302	0.288	4.526	0.000
consistencyPartial	0.050	0.259	0.195	0.846

Adults

It was not possible to fit the full model (correlation of fixed factors = 1), therefore, as with trained nouns, we removed the interaction between semantic consistency and type frequency.

afcNEWadult <- droplevels(subset(afc, age == "Adult" & oldnew == "new" & consistency != "Inconsistent"))
afcNEWadult$consistency <-relevel(afcNEWadult$consistency, ref="Partial")

afcNEWadult <- lizCenter(afcNEWadult, list("consistency", "typefreq"))
## creates versions of these factors with centered coding

afcNEWadult.lmer1 = glmer(correct ~ + consistency.ct + typefreq.ct + (1|participant), data = afcNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(afcNEWadult.lmer1, digits = 3)

## MODEL INFO:
## Observations: 320
## Dependent Variable: correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 174.119, BIC = 189.193
## Pseudo-R² (fixed effects) = 0.285
## Pseudo-R² (total) = 0.766 
## 
## FIXED EFFECTS:
## ------------------------------------------------------
##                          Est.    S.E.   z val.       p
## -------------------- -------- ------- -------- -------
## (Intercept)             4.566   0.979    4.664   0.000
## consistency.ct          3.918   1.394    2.810   0.005
## typefreq.ct            -0.819   1.198   -0.684   0.494
## ------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     2.601   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      40      0.673 
## --------------------------------

# Get means for each consistency condition
summarySEwithin(data = afcNEWadult, "correct", betweenvars = "consistency", withinvars = NULL, idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   consistency   N correct correct_norm  sd  se  ci
## 1  Consistent 160  0.9750      0.88125 Inf Inf Inf
## 2     Partial 160  0.7875      0.88125 Inf Inf Inf

Is Performance Above Chance in each condition separately?

# Rerun the modle with the intercept and main effect of consistency removed and a separate intercept for each of consistent and partial conditions 
afcNEWadult.lmer2 = glmer(correct ~ + consistency + consistency.ct + typefreq.ct - 1  - consistency.ct + (1|participant), data = afcNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#check it is the same model
anova(afcNEWadult.lmer1, afcNEWadult.lmer2)

## Data: afcNEWadult
## Models:
## afcNEWadult.lmer1: correct ~ +consistency.ct + typefreq.ct + (1 | participant)
## afcNEWadult.lmer2: correct ~ +consistency + consistency.ct + typefreq.ct - 1 - consistency.ct + 
## afcNEWadult.lmer2:     (1 | participant)
##                   Df    AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## afcNEWadult.lmer1  4 174.12 189.19 -83.06   166.12                        
## afcNEWadult.lmer2  4 174.12 189.19 -83.06   166.12     0      0  < 2.2e-16
##                      
## afcNEWadult.lmer1    
## afcNEWadult.lmer2 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# print out the intercepts for each of cosnsistent and partial conditions
get_coeffs(afcNEWadult.lmer2, c("consistencyConsistent", "consistencyPartial"))

	Estimate	Std. Error	z value	Pr(>\|z\|)
consistencyConsistent	6.525	1.469	4.441	0.000
consistencyPartial	2.607	0.855	3.051	0.002

Comparing children and adults

afcNEWboth <- droplevels(subset(afc, oldnew=="new" & consistency != "Inconsistent"))
afcNEWboth$consistency <-relevel(afcNEWboth$consistency, ref="Partial")

afcNEWboth <- lizCenter(afcNEWboth, list("consistency", "typefreq","age"))
## creates versions of these factors with centered coding

afcNEWboth.lmer1 = glmer(correct ~  + age.ct * consistency.ct * typefreq.ct - (consistency.ct : typefreq.ct) + (1|participant), data = afcNEWboth, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(afcNEWboth.lmer1, digits = 3)

## MODEL INFO:
## Observations: 800
## Dependent Variable: correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 761.500, BIC = 798.977
## Pseudo-R² (fixed effects) = 0.394
## Pseudo-R² (total) = 0.626 
## 
## FIXED EFFECTS:
## -------------------------------------------------------------------------
##                                             Est.    S.E.   z val.       p
## --------------------------------------- -------- ------- -------- -------
## (Intercept)                                1.862   0.260    7.165   0.000
## age.ct                                     2.807   0.549    5.112   0.000
## consistency.ct                             2.140   0.482    4.444   0.000
## typefreq.ct                               -0.165   0.426   -0.388   0.698
## age.ct:consistency.ct                      1.941   1.049    1.851   0.064
## age.ct:typefreq.ct                        -1.472   0.973   -1.512   0.130
## age.ct:consistency.ct:typefreq.ct         -1.980   1.872   -1.058   0.290
## -------------------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     1.429   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant     100      0.383 
## --------------------------------

#print means
summarySEwithin(data = afcNEWboth, "correct", betweenvars = c("consistency","age"), withinvars = NULL, idvar = "participant", na.rm = FALSE, conf.interval = 0.95, .drop = TRUE)

##   consistency   age   N   correct correct_norm  sd  se  ci
## 1  Consistent Adult 160 0.9750000      0.72125 Inf Inf Inf
## 2  Consistent Child 240 0.7208333      0.72125 Inf Inf Inf
## 3     Partial Adult 160 0.7875000      0.72125 Inf Inf Inf
## 4     Partial Child 240 0.5083333      0.72125 Inf Inf Inf

Awareness (Questionnaire)

Data Preparation (Production & 2AFC)

We need a column coding whether the participant is aware/unaware of the semantic conditioning. This is coded in the column “awareness” in the datasheet. However the coding for inconsistent is inappropriate as there are no semantic patterns in the data (NB - the questionnaire was adminsiterd to all participants and coded blind to condition) We therefore re-code this as “na”.

prod1$awareness2 = as.character(prod1$awareness)
prod1$awareness2[prod1$consistency == "Inconsistent"] = "na"
prod1$awareness2 = factor(prod1$awareness2)

afc$awareness2 = as.character(afc$awareness)
afc$awareness2[afc$consistency == "Inconsistent"] = "na"
afc$awareness2 = factor(afc$awareness2)

Relationship between awareness and age & condition

Numbers aware/unaware in each cell- Table 4

# note - sanity check  that the afc and production dataframes give the same information about the  numbers of particiants who are aware/ unaware in each conditon

ddply(subset(prod1,consistency!="Inconsistent"),age~consistency+typefreq+awareness,plyr::summarise,Current_N=length(unique(participant)))== 
ddply(subset(afc,consistency!="Inconsistent"),age~consistency+typefreq+awareness,plyr::summarise,Current_N=length(unique(participant)))

##        age consistency typefreq awareness Current_N
##  [1,] TRUE        TRUE     TRUE      TRUE      TRUE
##  [2,] TRUE        TRUE     TRUE      TRUE      TRUE
##  [3,] TRUE        TRUE     TRUE      TRUE      TRUE
##  [4,] TRUE        TRUE     TRUE      TRUE      TRUE
##  [5,] TRUE        TRUE     TRUE      TRUE      TRUE
##  [6,] TRUE        TRUE     TRUE      TRUE      TRUE
##  [7,] TRUE        TRUE     TRUE      TRUE      TRUE
##  [8,] TRUE        TRUE     TRUE      TRUE      TRUE
##  [9,] TRUE        TRUE     TRUE      TRUE      TRUE
## [10,] TRUE        TRUE     TRUE      TRUE      TRUE
## [11,] TRUE        TRUE     TRUE      TRUE      TRUE
## [12,] TRUE        TRUE     TRUE      TRUE      TRUE
## [13,] TRUE        TRUE     TRUE      TRUE      TRUE
## [14,] TRUE        TRUE     TRUE      TRUE      TRUE

plyr::ddply(subset(afc,consistency!="Inconsistent"),age~consistency+typefreq+awareness,plyr::summarise,Current_N=length(unique(participant)))

##      age consistency typefreq awareness Current_N
## 1  Child  Consistent      Low         0        10
## 2  Child  Consistent      Low         1         5
## 3  Child  Consistent     High         0         7
## 4  Child  Consistent     High         1         8
## 5  Child     Partial      Low         0        15
## 6  Child     Partial     High         0        13
## 7  Child     Partial     High         1         2
## 8  Adult  Consistent      Low         1        10
## 9  Adult  Consistent     High         0         1
## 10 Adult  Consistent     High         1         9
## 11 Adult     Partial      Low         0         5
## 12 Adult     Partial      Low         1         5
## 13 Adult     Partial     High         0         5
## 14 Adult     Partial     High         1         5

Chi Square analyses

By consistency, collapsing over type freq.

aggregated.afc = aggregate(correct ~ participant + consistency + age + oldnew + awareness2 + typefreq + exception, data = afc, FUN = mean)

adults.aware = with(droplevels(subset(aggregated.afc, age == "Adult" & consistency != "Inconsistent" & oldnew == "new")), table(consistency, awareness2))
adults.aware

##             awareness2
## consistency   0  1
##   Consistent  1 19
##   Partial    10 10

chisq.test(adults.aware)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  adults.aware
## X-squared = 8.0251, df = 1, p-value = 0.004613

child.aware = with(droplevels(subset(aggregated.afc, age == "Child" & consistency != "Inconsistent" & oldnew == "new")), table(consistency, awareness2))
child.aware

##             awareness2
## consistency   0  1
##   Consistent 17 13
##   Partial    28  2

chisq.test(child.aware)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  child.aware
## X-squared = 8.8889, df = 1, p-value = 0.002869

By type freq, collapsing over consistency.

adults.aware = with(droplevels(subset(aggregated.afc, age == "Adult" & consistency != "Inconsistent" & oldnew == "new")), table(typefreq, awareness2))
adults.aware

##         awareness2
## typefreq  0  1
##     Low   5 15
##     High  6 14

chisq.test(adults.aware)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  adults.aware
## X-squared = 0, df = 1, p-value = 1

child.aware = with(droplevels(subset(aggregated.afc, age == "Child" & consistency != "Inconsistent" & oldnew == "new")), table(typefreq, awareness2))
child.aware

##         awareness2
## typefreq  0  1
##     Low  25  5
##     High 20 10

chisq.test(child.aware)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  child.aware
## X-squared = 1.4222, df = 1, p-value = 0.233

Production Test, Novel nouns, unaware only

Children, Fully Consistent Condition

Run a model with the same structure as previously except that just one condition data here - consistent- (since we didn’t see group level learning in the partial condition) meaning no contrasts for condition in the model; only running the model over the subset of data coded as “unaware”

prodNEWchild.con.unaware = droplevels(subset(prod1, age == "Child" & oldnew == "new" & consistency == "Consistent" & awareness2 == "0"))
# remove all the "aware" participants from the analysis 

prodNEWchild.con.unaware = lizCenter(prodNEWchild.con.unaware, list("typefreq", "session"))
## creates versions of these factors with centered coding

prodNEWchild.con.unaware.lmer = glmer(det_correct ~ + typefreq.ct * session.ct + (session.ct|participant),  data = prodNEWchild.con.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(prodNEWchild.con.unaware.lmer, digits = 3)

## MODEL INFO:
## Observations: 898
## Dependent Variable: det_correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 1239.595, BIC = 1273.196
## Pseudo-R² (fixed effects) = 0.002
## Pseudo-R² (total) = 0.054 
## 
## FIXED EFFECTS:
## --------------------------------------------------------------
##                                  Est.    S.E.   z val.       p
## ---------------------------- -------- ------- -------- -------
## (Intercept)                     0.062   0.125    0.501   0.616
## typefreq.ct                    -0.139   0.251   -0.553   0.580
## session.ct                      0.118   0.140    0.838   0.402
## typefreq.ct:session.ct          0.031   0.282    0.110   0.912
## --------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     0.418   
##  participant   session.ct      0.117   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      17      0.050 
## --------------------------------

Adults, Partially Consistent Condition

Run a model with the same structure as previously except that just one condition data here - partial- (since there was only one unaware adult in the consistent condition) meaning no contrasts for condition in the model; only running the model over the subset of data coded as “unaware”

prodNEWadult.par.unaware <- droplevels(subset(prod1, age == "Adult" & oldnew == "new" & consistency == "Partial" & awareness2 == "0"))
# remove all the "aware" participants from the analysis 

prodNEWadult.par.unaware = lizCenter(prodNEWadult.par.unaware, list("typefreq", "session"))
## creates versions of these factors with centered coding

prodNEWadult.par.unaware.lmer = glmer(det_correct ~ + typefreq.ct*session.ct + (session.ct|participant), data = prodNEWadult.par.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(prodNEWadult.par.unaware.lmer, digits = 3)

## MODEL INFO:
## Observations: 635
## Dependent Variable: det_correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 816.133, BIC = 847.308
## Pseudo-R² (fixed effects) = 0.006
## Pseudo-R² (total) = 0.221 
## 
## FIXED EFFECTS:
## --------------------------------------------------------------
##                                  Est.    S.E.   z val.       p
## ---------------------------- -------- ------- -------- -------
## (Intercept)                     0.346   0.199    1.741   0.082
## typefreq.ct                    -0.124   0.396   -0.313   0.754
## session.ct                     -0.021   0.523   -0.040   0.968
## typefreq.ct:session.ct         -0.566   1.045   -0.542   0.588
## --------------------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     0.556   
##  participant   session.ct      1.549   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      10      0.086 
## --------------------------------

2AFC Test, Novel nouns, unaware only

Children, Fully Consistent Condition

afcNEWchild.con.unaware <- droplevels(subset(afc, age == "Child" & oldnew == "new" & consistency == "Consistent" & awareness2 == "0"))
# remove all the "aware" participants from the analysis 

afcNEWchild.con.unaware = lizCenter(afcNEWchild.con.unaware, list("typefreq"))
## creates versions of these factors with centered coding


afcNEWchild.con.unaware.lmer = glmer(correct ~  + typefreq.ct + (1|participant), data = afcNEWchild.con.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(afcNEWchild.con.unaware.lmer, digits = 3)

## MODEL INFO:
## Observations: 136
## Dependent Variable: correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 192.518, BIC = 201.256
## Pseudo-R² (fixed effects) = 0.001
## Pseudo-R² (total) = 0.083 
## 
## FIXED EFFECTS:
## --------------------------------------------------
##                      Est.    S.E.   z val.       p
## ----------------- ------- ------- -------- -------
## (Intercept)         0.096   0.221    0.435   0.663
## typefreq.ct         0.098   0.449    0.218   0.827
## --------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     0.542   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      17      0.082 
## --------------------------------

Adults, Partially Consistent Condition

afcNEWadult.par.unaware <- droplevels(subset(afc, age == "Adult" & oldnew == "new" & consistency == "Partial" & awareness2 == "0"))
# remove all the "aware" participants from the analysis 

afcNEWadult.par.unaware = lizCenter(afcNEWadult.par.unaware, list("typefreq"))
## creates versions of these factors with centered coding

afcNEWadult.par.unaware.lmer = glmer(correct ~ + typefreq.ct + (1|participant), data = afcNEWadult.par.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))

jtools::summ(afcNEWadult.par.unaware.lmer, digits = 3)

## MODEL INFO:
## Observations: 80
## Dependent Variable: correct
## Type: Mixed effects generalized linear regression
## Error Distribution: binomial
## Link function: logit 
## 
## MODEL FIT:
## AIC = 114.892, BIC = 122.038
## Pseudo-R² (fixed effects) = 0.003
## Pseudo-R² (total) = 0.003 
## 
## FIXED EFFECTS:
## ---------------------------------------------------
##                       Est.    S.E.   z val.       p
## ----------------- -------- ------- -------- -------
## (Intercept)          0.303   0.226    1.338   0.181
## typefreq.ct         -0.205   0.453   -0.452   0.651
## ---------------------------------------------------
## 
## RANDOM EFFECTS:
## ---------------------------------------
##     Group       Parameter    Std. Dev. 
## ------------- ------------- -----------
##  participant   (Intercept)     0.000   
## ---------------------------------------
## 
## Grouping variables:
## --------------------------------
##     Group      # groups    ICC  
## ------------- ---------- -------
##  participant      10      0.000 
## --------------------------------

Bayes Factor analysis

Children’s ability to generalise in the partial condition

QUESTION: Didn’t find evidence that children could generalise in the partial condition. Do we have evidnece for the null?

We first fit an glmer and then use the values from this in the BF analysis. Here we fit a model with both the consitent and partial conditions- equivalent to that run in main analyses but with a separate intercept for each consitency condition, since we use the estimate for the intercept for the consistent condition as a maximum estimate of what we might expect in the partial condition.

Production:

prodNEWchild <- droplevels(subset(prod1, age == "Child" & oldnew == "new" & consistency != "Inconsistent"))
# get the right data

#run glmer
prodNEWchild = lizCenter(prodNEWchild, c("typefreq", "session","consistency"))
prodNEWchild.lmer.forBF2 = glmer(det_correct ~
                                       
                           + consistency
                           + (consistency.ct * typefreq.ct * session.ct)
                           -1 - consistency.ct
                           + (1 + session.ct|participant),
                           data = prodNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#view coefficients
round(summary(prodNEWchild.lmer.forBF2)$coeff,3)

##                                       Estimate Std. Error z value Pr(>|z|)
## consistencyConsistent                    0.700      0.152   4.614    0.000
## consistencyPartial                       0.037      0.146   0.254    0.800
## typefreq.ct                              0.135      0.209   0.644    0.519
## session.ct                               0.388      0.093   4.146    0.000
## consistency.ct:typefreq.ct               0.377      0.419   0.900    0.368
## consistency.ct:session.ct               -0.504      0.186  -2.706    0.007
## typefreq.ct:session.ct                   0.127      0.182   0.700    0.484
## consistency.ct:typefreq.ct:session.ct   -0.234      0.365  -0.641    0.522

#compute BF using estimate and SE for the intercept for partial as the model of the data and HALF estiamte for the intercept forconsistent as sd of the half normal which is the model of H1 (since this consistutes a maximum level of generalisation  we might expect)
std.Error = summary(prodNEWchild.lmer.forBF2)$coeff["consistencyPartial", "Std. Error" ]
estimate = summary(prodNEWchild.lmer.forBF2)$coeff["consistencyPartial", "Estimate" ]
h1_sd = summary(prodNEWchild.lmer.forBF2)$coeff["consistencyConsistent", "Estimate"]/2
tails = 1

B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 0.1455831

estimate

## [1] 0.03692002

h1_sd

## [1] 0.3498475

## [1] 0.4699651

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table          to_table
## 1     subst_null                  0                 0
## 2          ambig 0.0464153520215615 0.510568872237177
## 3     subst_null  0.556984224258738  4.59511985013459

# estimate how many participants we might need to get evidence for the null
power_calc = Bf_powercalc(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1, N=30, min= 30, max = 200 )

Bf_rangetable(power_calc)

##   category_table from_table to_table
## 1          ambig         30      100
## 2     subst_null        101      200

Could have evidence for the null with aprox 100 (70 more) participants. (Note - not included in paper but is in letter to editor)

AFC

afcNEWchild <- droplevels(subset(afc, age == "Child" & oldnew == "new" & consistency != "Inconsistent"))
# get the right data

# run glmer
afcNEWchild = lizCenter(afcNEWchild, c("typefreq", "consistency"))
afcNEWchild.lmer.forBF2 = glmer(correct ~
                                       
                           + consistency
                           + (consistency.ct * typefreq.ct)
                           -1 - consistency.ct
                           + (1 |participant),
                           data = afcNEWchild, family = binomial, control = glmerControl(optimizer = "bobyqa"))
round(summary(afcNEWchild.lmer.forBF2)$coeff,3)

##                            Estimate Std. Error z value Pr(>|z|)
## consistencyConsistent         1.302      0.288   4.526    0.000
## consistencyPartial            0.050      0.259   0.195    0.846
## typefreq.ct                   0.381      0.380   1.004    0.315
## consistency.ct:typefreq.ct   -0.366      0.759  -0.482    0.630

# view coefficients
std.Error = summary(afcNEWchild.lmer.forBF2)$coeff["consistencyPartial", "Std. Error" ]

#compute BF using estimate and SE for the intercept for partial as the model of the data and HALF estiamte for the intercept forconsistent as sd of the half normal which is the model of H1 (since this consistutes a maximum level of generalisation  we might expect)
estimate = summary(afcNEWchild.lmer.forBF2)$coeff["consistencyPartial", "Estimate" ]
h1_sd = summary(afcNEWchild.lmer.forBF2)$coeff["consistencyConsistent", "Estimate"]/2
tails = 1


B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 0.2592327

estimate

## [1] 0.05046282

h1_sd

## [1] 0.6510196

## [1] 0.428017

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table          to_table
## 1     subst_null                  0                 0
## 2          ambig 0.0464153520215615 0.835476336388107
## 3     subst_null  0.881891688409669  4.59511985013459

# estimate how many participants we might need to get evidence for the null
power_calc = Bf_powercalc(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1, N=30, min= 30, max = 200 )

Bf_rangetable(power_calc)

##   category_table from_table to_table
## 1          ambig         30       62
## 2     subst_null         63      200

Type Frequency effect

Question: We didn’t see evidence for a TF effect anywhere in the data. Do we have evidence for the null somewhere in this data?

We look at novel nouns since this is where hypothesis that HTF>LTF is clear

Children, consistent condition

We focus on consistent condition since this was the condition where we saw learning overall.

First run a model equivalent to those run for main analsyes but just with consistent condition data.

Then we compute a BF using values from this model. We also compute robustness regions and since evidence is ambigious conduct analyses to see how many more participants we might need to get evidence for H0/h1 (based on the assumption that SE reduces with sqrt(N))

Production:

prodNEWchild.cons <- droplevels(subset(prod1, age == "Child" & oldnew == "new" & consistency == "Consistent"))
# get the right data

#run glmer model
prodNEWchild.cons = lizCenter(prodNEWchild.cons, c("typefreq", "session"))
## creates versions of these factors with centered coding

prodNEWchild.lmer.forBF = glmer(det_correct ~
                                 + (typefreq.ct * session.ct)
                                 + (1 + session.ct|participant),
                                 data = prodNEWchild.cons, family = binomial, control = 
                                   glmerControl(optimizer = "bobyqa"))

#view coefficients
round(summary(prodNEWchild.lmer.forBF)$coeff,3)

##                        Estimate Std. Error z value Pr(>|z|)
## (Intercept)               0.770      0.221   3.477    0.001
## typefreq.ct              -0.070      0.439  -0.159    0.874
## session.ct                0.768      0.197   3.904    0.000
## typefreq.ct:session.ct    0.217      0.371   0.586    0.558

#compute BF using estimate and SE for main effect of type frequency as model of the data and estimate of the intercept as the sd of the half normal which is the model of H1
std.Error = summary(prodNEWchild.lmer.forBF)$coeff["typefreq.ct", "Std. Error" ]
estimate = summary(prodNEWchild.lmer.forBF)$coeff["typefreq.ct", "Estimate" ]
h1_sd = summary(prodNEWchild.lmer.forBF)$coeff["(Intercept)", "Estimate"]
#
B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 0.43936

estimate

## [1] -0.06982416

h1_sd

## [1] 0.7698656

## [1] 0.4474644

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this
#
sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table         to_table
## 1     subst_null                  0                0
## 2          ambig 0.0464153520215615 1.06755309649591
## 3     subst_null   1.11396844851748 4.59511985013459

# estimate how many participants we might need to get evidence for the null
power_calc = Bf_powercalc(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1, N=30, min= 30, max = 200 )

Bf_rangetable(power_calc)

##   category_table from_table to_table
## 1          ambig         30       55
## 2     subst_null         56      200

Estimate could have evidence for the null with approximately 26 more participants. (Note - not included in paper but is in letter to editor)

AFC:

afcNEWchild.cons <- droplevels(subset(afc, age == "Child" & oldnew == "new" & consistency == "Consistent"))

# get the right data

# run glmer model
afcNEWchild.cons = lizCenter(afcNEWchild.cons, c("typefreq"))
afcNEWchild.lmer.forBF = glmer(correct ~
                                 
                                 + typefreq.ct 
                                 + (1|participant),
                                 data = afcNEWchild.cons, family = binomial, control = 
                                   glmerControl(optimizer = "bobyqa"))
#view coefficients
round(summary(afcNEWchild.lmer.forBF)$coeff,3)

##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    1.578      0.436   3.620     0.00
## typefreq.ct    0.618      0.782   0.789     0.43

#compute BF using estimate and SE for main effect of type frequency as model of the data and estimate of the intercept as the sd of the half normal which is the model of H1
std.Error = summary(afcNEWchild.lmer.forBF)$coeff["typefreq.ct", "Std. Error" ]
estimate = summary(afcNEWchild.lmer.forBF)$coeff["typefreq.ct", "Estimate" ]
h1_sd = summary(afcNEWchild.lmer.forBF)$coeff["(Intercept)", "Estimate"]
tails = 1


B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 0.7823647

estimate

## [1] 0.6175495

h1_sd

## [1] 1.577875

## [1] 0.8693503

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table         to_table
## 1     subst_null                  0                0
## 2          ambig 0.0464153520215615 4.59511985013459

# estimate how many participants we might need to get evidence for H1

powerrange = Bf_powercalc(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1, N=30, min= 30, max = 250 )


Bf_rangetable(powerrange)

##   category_table from_table to_table
## 1          ambig         30      211
## 2       subst_H1        212      250

Estimate would need over 200 participants to get evidence for the null. (Note - not included in paper but is in letter to editor)

Adults, consistent + partial conditions

We look at data from both partial and consistent conditions since we saw overall learning in both of these.

First run a model equivalent to those run for main analsyes but just with consistent condition data.

Then we compute a BF using values from this model. We also compute robustness regions.

prodNEWadult <- droplevels(subset(prod1, age == "Adult" & oldnew == "new" & consistency != "Inconsistent"))
# get the right data

#run glmer model
prodNEWadult$consistency = relevel(prodNEWadult$consistency, ref = "Partial")
prodNEWadult <- lizCenter(prodNEWadult, list("consistency", "typefreq", "session"))
prodNEWadult.lmer.forBF = glmer(det_correct ~
                                   
                                   + (consistency.ct * typefreq.ct * session.ct)
                                 + (1 + session.ct|participant),
                                 data = prodNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#view coefficients
round(summary(prodNEWadult.lmer.forBF)$coeff,3)

##                                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)                              4.311      0.634   6.801    0.000
## consistency.ct                           3.443      1.030   3.342    0.001
## typefreq.ct                             -0.277      1.010  -0.275    0.784
## session.ct                               2.216      1.023   2.165    0.030
## consistency.ct:typefreq.ct              -1.622      1.987  -0.816    0.414
## consistency.ct:session.ct                1.892      1.429   1.323    0.186
## typefreq.ct:session.ct                   1.619      1.352   1.198    0.231
## consistency.ct:typefreq.ct:session.ct    3.926      2.606   1.506    0.132

#compute BF using estimate and SE for main effect of type frequency as model of the data and estimate of the intercept as the sd of the half normal which is the model of H1

std.Error = summary(prodNEWadult.lmer.forBF)$coeff["typefreq.ct", "Std. Error" ]
estimate = summary(prodNEWadult.lmer.forBF)$coeff["typefreq.ct", "Estimate" ]
h1_sd = summary(prodNEWadult.lmer.forBF)$coeff["(Intercept)", "Estimate"]
tails = 1


B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 1.009707

estimate

## [1] -0.2771888

h1_sd

## [1] 4.310967

## [1] 0.1885272

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table         to_table
## 1     subst_null                  0                0
## 2          ambig 0.0464153520215615 2.27435224905651
## 3     subst_null   2.32076760107808 4.59511985013459

2AFC: (note - lmer model has interaction term removed as in main analyses)

afcNEWadult <- droplevels(subset(afc, age == "Adult" & oldnew == "new" & consistency != "Inconsistent"))
# get the right data

#run glmer model
afcNEWadult$consistency <-relevel(afcNEWadult$consistency, ref="Partial")
afcNEWadult <- lizCenter(afcNEWadult, list("consistency", "typefreq"))
afcNEWadult.lmer.forBF = glmer(correct ~
                          + consistency.ct + typefreq.ct
                          + (1|participant),
                          data = afcNEWadult, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#view coefficients
summary(afcNEWadult.lmer.forBF)$coeff

##                  Estimate Std. Error   z value     Pr(>|z|)
## (Intercept)     4.5661460  0.9789287  4.664432 3.094711e-06
## consistency.ct  3.9175043  1.3940862  2.810088 4.952803e-03
## typefreq.ct    -0.8193968  1.1982761 -0.683813 4.940932e-01

#compute BF using estimate and SE for main effect of type frequency as model of the data and estimate of the intercept as the sd of the half normal which is the model of H1
std.Error = summary(afcNEWadult.lmer.forBF)$coeff["typefreq.ct", "Std. Error" ]
estimate = summary(afcNEWadult.lmer.forBF)$coeff["typefreq.ct", "Estimate" ]
h1_sd = summary(afcNEWadult.lmer.forBF)$coeff["(Intercept)", "Estimate"]
tails = 1

B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 1.198276

estimate

## [1] -0.8193968

h1_sd

## [1] 4.566146

## [1] 0.162582

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table         to_table
## 1     subst_null                  0                0
## 2          ambig 0.0464153520215615 2.04227548894871
## 3     subst_null   2.08869084097027 4.59511985013459

Unaware participants and generalization

Question: We saw above that even where there is group level generalisation, unaware participants don’t generalize. Do we have evidence for the null?

Children, consistent condition

We first fit an glmer and then use the values from this in the BF analysis. To get the value to inform H1, want to base it on the mean (intercept) for the aware participants, using this as a maximum. However can’t fit a model with intercept for aware participants (I think due to the fact there are some cells at ceiling) so instead take the log odds of the mean for awareparticipants directly.

We also compute robustness regions.

Production

prodNEWchild.con.unaware = droplevels(subset(prod1, age == "Child" & oldnew == "new" & consistency == "Consistent" & awareness2 == 0 ))
# get the right data

#run glmer
prodNEWchild.con.unaware = lizCenter(prodNEWchild.con.unaware, list("typefreq", "session"))

prodNEWchild.con.unaware.lmer.forBF = glmer(det_correct ~
                             +typefreq.ct*session.ct
                              + (1|participant),
                              data = prodNEWchild.con.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#view coefficients
round(summary(prodNEWchild.con.unaware.lmer.forBF)$coeff,3)

##                        Estimate Std. Error z value Pr(>|z|)
## (Intercept)               0.059      0.123   0.484    0.629
## typefreq.ct              -0.135      0.248  -0.547    0.585
## session.ct                0.115      0.137   0.839    0.401
## typefreq.ct:session.ct    0.035      0.275   0.129    0.898

#compute BF using estimate and SE for the intercept for unaware participants as the model of the data and HALF estiamte for the intercept for aware participants as sd of the half normal which is the model of H1 (since this consistutes a maximum level of generalisation  we might expect)

std.Error = summary(prodNEWchild.con.unaware.lmer.forBF)$coeff["(Intercept)", "Std. Error" ]
estimate = summary(prodNEWchild.con.unaware.lmer.forBF)$coeff["(Intercept)", "Estimate" ]

h1_sd = logodds(mean(subset(prod1, age == "Child" & oldnew == "new" & consistency == "Consistent" & awareness2 == 1 )$det_correct))/2
# note- computed directly as couldn't fit  model with intercept for aware participants

B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 0.1229456

estimate

## [1] 0.05944611

h1_sd

## [1] 0.642379

## [1] 0.2852503

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table          to_table
## 1     subst_null                  0                 0
## 2          ambig 0.0464153520215615 0.510568872237177
## 3     subst_null  0.556984224258738  4.59511985013459

2AFC

afcNEWchild.con.unaware = droplevels(subset(afc, age == "Child" & oldnew == "new" & consistency == "Consistent" & awareness2 == 0 ))
# get the right data


afcNEWchild.con.unaware = lizCenter(afcNEWchild.con.unaware, list("typefreq"))

afcNEWchild.con.unaware.lmer.forBF = glmer(correct ~
                             +typefreq.ct
                              + (1|participant),
                              data = afcNEWchild.con.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#view coefficients
round(summary(afcNEWchild.con.unaware.lmer.forBF)$coeff,3)

##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    0.096      0.221   0.435    0.663
## typefreq.ct    0.098      0.449   0.218    0.827

#compute BF using estimate and SE for the intercept for unaware participants as the model of the data and HALF estiamte for the intercept for aware participants as sd of the half normal which is the model of H1 (since this consistutes a maximum level of generalisation  we might expect)

std.Error = summary(afcNEWchild.con.unaware.lmer.forBF)$coeff["(Intercept)", "Std. Error" ]
estimate = summary(afcNEWchild.con.unaware.lmer.forBF)$coeff["(Intercept)", "Estimate" ]
h1_sd = logodds(mean(subset(afc, age == "Child" & oldnew == "new" & consistency == "Consistent" & awareness2 == 1 )$correct))/2

B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 0.2213363

estimate

## [1] 0.09639177

h1_sd

## [1] 1.965913

## [1] 0.1619984

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table         to_table
## 1     subst_null                  0                0
## 2          ambig 0.0464153520215615 0.92830704043123
## 3     subst_null  0.974722392452792 4.59511985013459

Adults, partial condition

We also compute robustness regions. and since evidence is ambigious conduct analyses to see how many more participants we might need to get evidence for H0/h1 (based on the assumption that SE reduces with sqrt(N))

prodNEWadult.partial.unaware = droplevels(subset(prod1, age == "Adult" & oldnew == "new" & consistency == "Partial" & awareness2 == 0 ))
# get the right data

#run glmer
prodNEWadult.partial.unaware = lizCenter(prodNEWadult.partial.unaware, list("typefreq", "session"))

prodNEWadult.partial.unaware.lmer.forBF = glmer(det_correct ~
                             +typefreq.ct*session.ct
                              + (1|participant),
                              data = prodNEWadult.partial.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#view coefficients
round(summary(prodNEWadult.partial.unaware.lmer.forBF)$coeff,3)

##                        Estimate Std. Error z value Pr(>|z|)
## (Intercept)               0.265      0.156   1.699    0.089
## typefreq.ct              -0.223      0.312  -0.715    0.474
## session.ct                0.153      0.164   0.931    0.352
## typefreq.ct:session.ct   -0.403      0.328  -1.231    0.218

#compute BF using estimate and SE for the intercept for unaware participants as the model of the data and HALF estiamte for the intercept for aware participants as sd of the half normal which is the model of H1 (since this consistutes a maximum level of generalisation  we might expect)

std.Error = summary(prodNEWadult.partial.unaware.lmer.forBF)$coeff["(Intercept)", "Std. Error" ]
estimate = summary(prodNEWadult.partial.unaware.lmer.forBF)$coeff["(Intercept)", "Estimate" ]

h1_sd = logodds(mean(subset(prod1, age == "Adult" & oldnew == "new" & consistency == "Partial" & awareness2 == 1 )$det_correct))/2
# note- computed directly as couldn't fit  model with intercept for aware participants

B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 0.1560646

estimate

## [1] 0.2651569

h1_sd

## [1] 1.666102

## [1] 0.7426316

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table         to_table
## 1     subst_null                  0                0
## 2          ambig 0.0464153520215615 3.75964351374648
## 3     subst_null   3.80605886576804 4.59511985013459

# estimate how many participants we might need to get evidence for the null
power_calc = Bf_powercalc(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1, N=10, min= 10, max = 200 )

Bf_rangetable(power_calc)

##   category_table from_table to_table
## 1          ambig         10       22
## 2       subst_H1         23      200

Estimate could have evidence for the null with approximately 13 more participants. (But note would have to test a lot more to find more “unaware” participants). (Note - not included in paper but is in letter to editor)

# get the right data
afcNEWadult.partial.unaware = droplevels(subset(afc, age == "Adult" & oldnew == "new" & consistency == "Partial" & awareness2 == 0 ))

afcNEWadult.partial.unaware = lizCenter(afcNEWadult.partial.unaware, list("typefreq"))

#run glmer
afcNEWadult.partial.unaware.lmer.forBF = glmer(correct ~
                             +typefreq.ct
                              + (1|participant),
                              data = afcNEWadult.partial.unaware, family = binomial, control = glmerControl(optimizer = "bobyqa"))

#view coefficients
round(summary(afcNEWadult.partial.unaware.lmer.forBF)$coeff,3)

##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    0.303      0.226   1.338    0.181
## typefreq.ct   -0.205      0.453  -0.452    0.651

#compute BF using estimate and SE for the intercept for unaware participants as the model of the data and HALF estiamte for the intercept for aware participants as sd of the half normal which is the model of H1 (since this consistutes a maximum level of generalisation  we might expect)

std.Error = summary(afcNEWadult.partial.unaware.lmer.forBF)$coeff["(Intercept)", "Std. Error" ]
estimate = summary(afcNEWadult.partial.unaware.lmer.forBF)$coeff["(Intercept)", "Estimate" ]

h1_sd = logodds(.99)/2
# note- computed directly as couldn't fit  model with intercept for aware participants

B = Bf(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1)$BayesFactor

std.Error

## [1] 0.2264821

estimate

## [1] 0.3030679

h1_sd

## [1] 2.29756

## [1] 0.4346114

# print table that shows range of values of h1_sd that would provide subst evidence of H1 or H0 or ambiguous evidence.We can read the robustness regions from this

sdtheoryrange = seq(from = 0, to =logodds(.99), length.out = 100)
range = Bf_range(sd=std.Error, obtained=estimate, meanoftheory=0, sdtheoryrange, tail=1)
Bf_rangetable(range)

##   category_table         from_table         to_table
## 1     subst_null                  0                0
## 2          ambig 0.0464153520215615  3.0169978814015
## 3     subst_null   3.06341323342306 4.59511985013459

# estimate how many participants we might need to get evidence for the null
power_calc = Bf_powercalc(sd=std.Error, uniform = NULL, obtained=estimate, meanoftheory = 0, sdtheory = h1_sd, tail = 1, N=10, min= 30, max = 200 )

Bf_rangetable(power_calc)

##   category_table from_table to_table
## 1          ambig         30       38
## 2       subst_H1         39      200

Estimate could have evidence for the null with approximately 28 more participants. (But note would have to test a lot more to find more “unaware” participants). (Note - not included in paper but is in letter to editor)