Load packages and helper functions

Packages

library(psych)
library(plyr)
library(doBy)
library(cowplot)
library(reshape2)
library(lme4)
library(brms)
library(tidyr)
library(tidyverse)
library(data.table)
library(janitor)
library(brms)
library(yarrr)
library(knitr)


source("lmedrop.R")
source("myCenter.R")
source("lizCenter.R")
source("summarySEwithin.R")
source("summarySE.R")
source("normDataWithin.R")
source("BF.R")
source("Bf_range.R")
source("Bf_powercalc.R")


theme_set(theme_bw())

Helper functions

SummarySE

This function can be found on the website “Cookbook for R”.

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions

It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence intervals (default 95%).

data: a data frame.

measurevar: the name of a column that contains the variable to be summariezed

groupvars: a vector containing names of columns that contain grouping variables

na.rm: a boolean that indicates whether to ignore NA’s

conf.interval: the percent range of the confidence interval (default is 95%)

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
    require(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}

SummarySEwithin

This function can be found on the website “Cookbook for R”.

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions

It summarizes data, handling within-subjects variables by removing inter-subject variability. It will still work if there are no within-S variables. It gives count, un-normed mean, normed mean (with same between-group mean), standard deviation, standard error of the mean, and confidence intervals. If there are within-subject variables, calculate adjusted values using method from Morey (2008).

data: a data frame.

measurevar: the name of a column that contains the variable to be summarized

betweenvars: a vector containing names of columns that are between-subjects variables

withinvars: a vector containing names of columns that are within-subjects variables

idvar: the name of a column that identifies each subject (or matched subjects)

na.rm: a boolean that indicates whether to ignore NA’s

conf.interval: the percent range of the confidence interval (default is 95%)

summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
                            idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) {

  # Ensure that the betweenvars and withinvars are factors
  factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
    FUN=is.factor, FUN.VALUE=logical(1))

  if (!all(factorvars)) {
    nonfactorvars <- names(factorvars)[!factorvars]
    message("Automatically converting the following non-factors to factors: ",
            paste(nonfactorvars, collapse = ", "))
    data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
  }

  # Get the means from the un-normed data
  datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
                     na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

  # Drop all the unused columns (these will be calculated with normed data)
  datac$sd <- NULL
  datac$se <- NULL
  datac$ci <- NULL

  # Norm each subject's data
  ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)

  # This is the name of the new column
  measurevar_n <- paste(measurevar, "_norm", sep="")

  # Collapse the normed data - now we can treat between and within vars the same
  ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
                      na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

  # Apply correction from Morey (2008) to the standard error and confidence interval
  #  Get the product of the number of conditions of within-S variables
  nWithinGroups    <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
                           FUN.VALUE=numeric(1)))
  correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )

  # Apply the correction factor
  ndatac$sd <- ndatac$sd * correctionFactor
  ndatac$se <- ndatac$se * correctionFactor
  ndatac$ci <- ndatac$ci * correctionFactor

  # Combine the un-normed means with the normed results
  merge(datac, ndatac)
}

normDataWithin

This function is used by the SummarySEWithin fucntion above. It can be found on the website “Cookbook for R”.

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions

From that website: Norms the data within specified groups in a data frame; it normalizes each subject (identified by idvar) so that they have the same mean, within each group specified by betweenvars.

data: a data frame

idvar: the name of a column that identifies each subject (or matched subjects)

measurevar: the name of a column that contains the variable to be summarized

betweenvars: a vector containing names of columns that are between-subjects variables

na.rm: a boolean that indicates whether to ignore NA’s

normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
                           na.rm=FALSE, .drop=TRUE) {
    require(plyr)

    # Measure var on left, idvar + between vars on right of formula.
    data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
     .fun = function(xx, col, na.rm) {
        c(subjMean = mean(xx[,col], na.rm=na.rm))
      },
      measurevar,
      na.rm
    )

    # Put the subject means with original data
    data <- merge(data, data.subjMean)

    # Get the normalized data in a new column
    measureNormedVar <- paste(measurevar, "_norm", sep="")
    data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
                               mean(data[,measurevar], na.rm=na.rm)

    # Remove this subject mean column
    data$subjMean <- NULL

    return(data)
}

myCenter

This function outputs the centered values of an variable, which can be a numeric variable, a factor, or a data frame. It was taken from Florian Jaegers blog https://hlplab.wordpress.com/2009/04/27/centering-several-variables/.

From his blog:

-If the input is a numeric variable, the output is the centered variable.

-If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor’s levels are converted into numerical values in their inherent order (if not specified otherwise, R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.

-If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by “c” (e.g. “Variable1” will be “cVariable1”).

myCenter= function(x) {
  if (is.numeric(x)) { return(x - mean(x, na.rm=T)) }
    if (is.factor(x)) {
        x= as.numeric(x)
        return(x - mean(x, na.rm=T))
    }
    if (is.data.frame(x) || is.matrix(x)) {
        m= matrix(nrow=nrow(x), ncol=ncol(x))
        colnames(m)= paste("c", colnames(x), sep="")
    
        for (i in 1:ncol(x)) {
        
            m[,i]= myCenter(x[,i])
        }
        return(as.data.frame(m))
    }
}

lizCenter

This function provides a wrapper around myCenter allowing you to center a specific list of variables from a dataframe. The input is a dataframe (x) and a list of the names of the variables which you wish to center (listfname). The output is a copy of the dataframe with a column (numeric) added for each of the centered variables with each one labelled with it’s previous name with “.ct” appended. For example, if x is a dataframe with columns “a” and “b” lizCenter(x, list(“a”, “b”)) will return a dataframe with two additional columns, a.ct and b.ct, which are numeric, centered codings of the corresponding variables.

lizCenter= function(x, listfname) 
{
    for (i in 1:length(listfname)) 
    {
        fname = as.character(listfname[i])
        x[paste(fname,".ct", sep="")] = myCenter(x[fname])
    }
        
    return(x)
}

###get_coeffs This function allows us to inspect particular coefficients from the output of an lme model by putting them in table.

x: the output returned when running lmer or glmer (i.e. an object of type lmerMod or glmerMod)

list: a list of the names of the coefficients to be extracted (e.g. c(“variable1”, “variable1:variable2”))

get_coeffs <- function(x,list){(as.data.frame(summary(x)$coefficients)[list,])}

Bf

This function is equivalent to the Dienes (2008) calculator which can be found here: http://www.lifesci.sussex.ac.uk/home/Zoltan_Dienes/inference/Bayes.htm.

The code was provided by Baguely and Kayne (2010) and can be found here: http://www.academia.edu/427288/Review_of_Understanding_psychology_as_a_science_An_introduction_to_scientific_and_statistical_inference

Bf<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0,sdtheory=1, tail=1){
 area <- 0
 if(identical(uniform, 1)){
  theta <- lower
  range <- upper - lower
  incr <- range / 2000
  for (A in -1000:1000){
     theta <- theta + incr
     dist_theta <- 1 / range
     height <- dist_theta * dnorm(obtained, theta, sd)
     area <- area + height * incr
  }
 }else
   {theta <- meanoftheory - 5 * sdtheory
    incr <- sdtheory / 200
    for (A in -1000:1000){
      theta <- theta + incr
      dist_theta <- dnorm(theta, meanoftheory, sdtheory)
      if(identical(tail, 1)){
        if (theta <= 0){
          dist_theta <- 0
        } else {
          dist_theta <- dist_theta * 2
        }
      }
      height <- dist_theta * dnorm(obtained, theta, sd)
      area <- area + height * incr
    }
 }
 LikelihoodTheory <- area
 Likelihoodnull <- dnorm(obtained, 0, sd)
 BayesFactor <- LikelihoodTheory / Likelihoodnull
 ret <- list("LikelihoodTheory" = LikelihoodTheory,"Likelihoodnull" = Likelihoodnull, "BayesFactor" = BayesFactor)
 ret
} 

Bf power calculation

This works with the Bf function above. It requires the same values as that function (i.e. the obtained mean and SE for the current sample, a value for the predicted mean, which is set to be sdtheory (with meanoftheory=0), and the current number of participants N). However, rather than returning a BF for the current sample, it works out what the BF would be for a range of different subject numbers (assuming that the SE scales with sqrt(N)).

Bf_powercalc<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0, sdtheory=1, tail=2, N, min, max)
{
  
  x = c(0)
  y = c(0)

# note: working out what the difference between N and df is (for the contrast between two groups, this is 2; for constraints where there is 4 groups this will be 3, etc.)

  for(newN in min : max)
  {
    B = as.numeric(Bf(sd = sd*sqrt(N/newN), obtained, uniform, lower, upper, meanoftheory, sdtheory, tail)[3])
    x= append(x,newN) 
    y= append(y,B)
    output = cbind(x,y)
    
  } 
  output = output[-1,] 
  return(output) 
}

Bf range

This works with the Bf function above. It requires the obtained mean and SE for the current sample and works out what the BF would be for a range of predicted means (which are set to be sdtheoryrange (with meanoftheory=0)).

Bf_range<-function(sd, obtained, meanoftheory=0, sdtheoryrange, tail=1)
{
  
  x = c(0)
  y = c(0)
  
  for(sdi in sdtheoryrange)
  {
    B = as.numeric(Bf(sd, obtained, meanoftheory=0, uniform = 0, sdtheory=sdi, tail)[3])
    
    x= append(x,sdi)  
    y= append(y,B)
    output = cbind(x,y)
    
  } 
  output = output[-1,] 
  colnames(output) = c("sdtheory", "BF")
  return(output) 
}

Experiment 1

Load data

# Experiment 1 - verb study with adults

#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp1_production_data.csv")

combined_judgment_data.df <- read.csv("exp1_judgment_data.csv")
combined_judgment_data.df$restricted_verb <- factor(combined_judgment_data.df$restricted_verb)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)


#separately for entrenchment and preemption

#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$transitivity_test_scene2 <- factor(entrenchment_production.df$transitivity_test_scene2)

# Create columns that we will need to run production analyses in entrenchment
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced

entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)


entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$transitivity_test_scene2 <- factor(entrenchment_judgment.df$transitivity_test_scene2)
entrenchment_judgment.df$restricted_verb <- factor(entrenchment_judgment.df$restricted_verb)


#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct)  #actually, all NAs here
preemption_production.df$transitivity_test_scene2 <- factor(preemption_production.df$transitivity_test_scene2)

# Create columns that we will need to run production analyses in pre-emption
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced

preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)


preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$transitivity_test_scene2 <- factor(preemption_judgment.df$transitivity_test_scene2)
preemption_judgment.df$restricted_verb <- factor(preemption_judgment.df$restricted_verb)

Preregistered data analyses

Question 1: Have participants picked up on the difference in meaning between the two argument-structure constructions?

Production data

#Figure 2
RQ1_graph_productions.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "alternating" |verb_type_training2 == "novel")
RQ1_graph_productions.df = subset(RQ1_graph_productions.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.graph1 = aggregate(semantically_correct ~ verb_type_training2 + participant_private_id, RQ1_graph_productions.df, FUN=mean)

aggregated.graph1 <- rename(aggregated.graph1, verb = verb_type_training2,
                            correct = semantically_correct)

yarrr::pirateplot(formula = correct  ~ verb,
                  data = aggregated.graph1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "% semantically correct",
                  cex.lab = 1,
                  cex.axis = 1,
                  cex.names = 1,
                  yaxt = "n")

axis(2, at = seq(0, 1, by = 0.25), las=1)
abline(h = 0.50, lty = 2)

#1 alternating verb production

alternating_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "alternating")

#and filter out responses where participants said something other than det1 or det2
alternating_prod.df = subset(alternating_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_alternating_prod.df = aggregate(semantically_correct ~ transitivity_test_scene2 + participant_private_id, alternating_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_alternating_prod.df$semantically_correct),3)
## [1] 0.945
# average accuracy separately for causative and inchoative scenes
round(tapply(aggregated.means_alternating_prod.df$semantically_correct, aggregated.means_alternating_prod.df$transitivity_test_scene2, mean),3)
## construction1 construction2 
##         0.971         0.919
# maximally vague priors for the intercept and the predictors
a = lizCenter(alternating_prod.df, list("transitivity_test_scene2"))  

alternating_model <-brm(formula = semantically_correct~transitivity_test_scene2.ct + (1 + transitivity_test_scene2.ct|participant_private_id), data=a, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model, variable = c("b_Intercept", "b_transitivity_test_scene2.ct" ))
##                                 Estimate Est.Error      Q2.5     Q97.5
## b_Intercept                    3.1770071 0.3603913  2.548108 3.9546609
## b_transitivity_test_scene2.ct -0.7616193 0.5237081 -1.809490 0.2852947
mcmc_plot(alternating_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##    c(C1, C2)
## 1 0.00000000
## 2 0.07058333
# no difference between construction 1 and construction 2

# Final model
# maximally vague priors for the intercept 
alternating_model_final = brm(formula = semantically_correct~1 + (1|participant_private_id), data=a, family = bernoulli(link = logit),set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 3.025824 0.3398678 2.449511 3.778411
mcmc_plot(alternating_model_final, variable = "b_Intercept", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
#2 novel verb production

novel_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "novel")

#filter out responses where participants said something other than det1 or det2
novel_prod.df = subset(novel_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_novel_prod.df = aggregate(semantically_correct ~ transitivity_test_scene2 + participant_private_id, novel_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_novel_prod.df$semantically_correct),3)
## [1] 0.955
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_novel_prod.df$semantically_correct, aggregated.means_novel_prod.df$transitivity_test_scene2, mean),3)
## construction1 construction2 
##         0.946         0.964
b = lizCenter(novel_prod.df, list("transitivity_test_scene2"))  

# maximally vague priors for the intercept and the predictors
novel_model <- brm(formula = semantically_correct~transitivity_test_scene2.ct + (1 + transitivity_test_scene2.ct|participant_private_id), data=b, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model, variable = c("b_Intercept", "b_transitivity_test_scene2.ct"))
##                                Estimate Est.Error      Q2.5    Q97.5
## b_Intercept                   3.4808362 0.3986055  2.773712 4.338027
## b_transitivity_test_scene2.ct 0.0422667 0.5993274 -1.145418 1.217504
mcmc_plot(novel_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1    0.0000
## 2    0.4675
# no difference between construction 1 and construction 2  
# Final model

# maximally vague priors for the intercept 
novel_model_final <- brm(formula = semantically_correct~1+ (1|participant_private_id), data=b, family = bernoulli(link = logit), set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error    Q2.5    Q97.5
## b_Intercept 3.230354 0.3633264 2.59911 4.011721
mcmc_plot(novel_model_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0

Judgment data

#Figure 3
RQ1_graph_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "alternating" |verb_type_training2 == "novel")

# aggregated dataframe for means
aggregated.graph2 = aggregate(response ~ verb_type_training2 + semantically_correct + participant_private_id, RQ1_graph_judgments.df, FUN=mean)
aggregated.graph2$semantically_correct <- recode(aggregated.graph2$semantically_correct, "1" = "yes","0" = "no")

aggregated.graph2 <- rename(aggregated.graph2, verb = verb_type_training2,
                                           correct = semantically_correct)

yarrr::pirateplot(formula = response ~ correct + verb,
                  data = aggregated.graph2,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

#1 alternating verb judgments

alternating_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "alternating")

# aggregated dataframe for means
aggregated.means_alternating_judgments = aggregate(response ~ transitivity_test_scene2 + semantically_correct + participant_private_id, alternating_judgments.df, FUN=mean)
aggregated.means_alternating_judgments$semantically_correct<- recode(aggregated.means_alternating_judgments$semantically_correct, "1" = "yes","0" = "no")
aggregated.means_alternating_judgments$transitivity_test_scene2<- recode(aggregated.means_alternating_judgments$transitivity_test_scene2, "construction1" = "transitive causative","construction2" = "intransitive inchoative")

# average accuracy for semantically correct vs. incorrect trials across causative and noncausative trial types
round(tapply(aggregated.means_alternating_judgments$response, aggregated.means_alternating_judgments$semantically_correct, mean),3)
##    no   yes 
## 2.506 4.837
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_alternating_judgments$response, list(aggregated.means_alternating_judgments$semantically_correct, aggregated.means_alternating_judgments$transitivity_test_scene2), mean),3)
##     transitive causative intransitive inchoative
## no                 2.512                   2.500
## yes                4.779                   4.895
c = lizCenter(alternating_judgments.df, list("transitivity_test_scene2", "semantically_correct"))  

# maximally vague priors for the predictors (we don't interpret the intercept here)
alternating_model_judgments <-brm(formula = response~transitivity_test_scene2.ct * semantically_correct.ct + (1 + transitivity_test_scene2.ct*semantically_correct.ct|participant_private_id), data=c, family = gaussian(), set_prior("normal(0,1)", class="b"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(alternating_model_judgments, variable = c("b_Intercept", "b_transitivity_test_scene2.ct", "b_semantically_correct.ct", "b_transitivity_test_scene2.ct:semantically_correct.ct"))
##                                                         Estimate  Est.Error
## b_Intercept                                           3.68325717 0.07829645
## b_transitivity_test_scene2.ct                         0.05087355 0.06731043
## b_semantically_correct.ct                             2.29388857 0.12843963
## b_transitivity_test_scene2.ct:semantically_correct.ct 0.12855771 0.16117333
##                                                             Q2.5     Q97.5
## b_Intercept                                            3.5298523 3.8387821
## b_transitivity_test_scene2.ct                         -0.0824319 0.1833776
## b_semantically_correct.ct                              2.0396688 2.5445500
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.1901475 0.4411337
mcmc_plot(alternating_model_judgments, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_judgments))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] < 0)
C3=mean(samps[,"b_semantically_correct.ct"] < 0)
C4=mean(samps[,"b_transitivity_test_scene2.ct:semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.2200833
## 3         0.0000000
## 4         0.2064167
# no difference between construction 1 and construction 2

# Final model

# maximally vague priors for the predictors (we don't interpret the intercept here)
alternating_model_judgments_final <-brm(formula = response~semantically_correct.ct + (1 + semantically_correct.ct|participant_private_id), data=c, family = gaussian(), set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_judgments, variable = c("b_Intercept", "b_semantically_correct.ct"))
##                           Estimate  Est.Error     Q2.5    Q97.5
## b_Intercept               3.683257 0.07829645 3.529852 3.838782
## b_semantically_correct.ct 2.293889 0.12843963 2.039669 2.544550
mcmc_plot(alternating_model_judgments_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_judgments_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0
#2 novel verb judgments

novel_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "novel")

# aggregated dataframe for means
aggregated.means_novel_judgments = aggregate(response ~ transitivity_test_scene2 + semantically_correct + participant_private_id, novel_judgments.df, FUN=mean)


# average accuracy for semantically correct vs. incorrect trials across causative and noncausative trial types
round(tapply(aggregated.means_novel_judgments$response, aggregated.means_novel_judgments$semantically_correct, mean),3)
##     0     1 
## 2.250 4.174
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_novel_judgments$response, list(aggregated.means_novel_judgments$semantically_correct, aggregated.means_novel_judgments$transitivity_test_scene2), mean),3)
##   construction1 construction2
## 0         2.302         2.198
## 1         4.221         4.128
d = lizCenter(novel_judgments.df, list("transitivity_test_scene2", "semantically_correct"))  

# maximally vague priors for the predictors (we don't interpret the intercept here) 
novel_model_judgments <-brm(formula = response~transitivity_test_scene2.ct * semantically_correct.ct + (1 + transitivity_test_scene2.ct*semantically_correct.ct|participant_private_id), data=d, family = gaussian(), set_prior("normal(0,1)", class="b"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model_judgments, variable = c("b_Intercept", "b_transitivity_test_scene2.ct", "b_semantically_correct.ct", "b_transitivity_test_scene2.ct:semantically_correct.ct"))
##                                                          Estimate  Est.Error
## b_Intercept                                            3.20322904 0.11078779
## b_transitivity_test_scene2.ct                         -0.09885838 0.09576321
## b_semantically_correct.ct                              1.87265986 0.16068903
## b_transitivity_test_scene2.ct:semantically_correct.ct  0.01173567 0.20162472
##                                                             Q2.5      Q97.5
## b_Intercept                                            2.9870587 3.42037300
## b_transitivity_test_scene2.ct                         -0.2857789 0.08920161
## b_semantically_correct.ct                              1.5491262 2.18477274
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.3748625 0.41433687
mcmc_plot(novel_model_judgments, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_judgments))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
C3=mean(samps[,"b_semantically_correct.ct"] < 0)
C4=mean(samps[,"b_transitivity_test_scene2.ct:semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.1516667
## 3         0.0000000
## 4         0.4774167
# no difference between construction 1 and construction 2
# Final model

# maximally vague priors for the predictors (we don't interpret the intercept here) 
novel_model_judgments_final <-brm(formula = response~semantically_correct.ct + (1 + semantically_correct.ct|participant_private_id), data=d, family = gaussian(), set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_judgments, variable = c("b_Intercept", "b_semantically_correct.ct"))
##                           Estimate  Est.Error     Q2.5    Q97.5
## b_Intercept               3.683257 0.07829645 3.529852 3.838782
## b_semantically_correct.ct 2.293889 0.12843963 2.039669 2.544550
mcmc_plot(novel_model_judgments_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_judgments_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0

Question 2: Does statistical pre-emption constrain verb argument construction generalizations in adults (judgment data)?

#Figure 4

#first, filter our semantically incorrect trials

judgments_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")   

#we only want to keep novel
judgments_novel.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items
judgment_unattested_constr1.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
judgment_unattested_constr2.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

judgment_unattested_novel.df <- rbind(judgments_novel.df, judgment_unattested_constr1.df, judgment_unattested_constr2.df)

aggregated.means = aggregate(response ~ condition + restricted_verb + participant_private_id, judgment_unattested_novel.df, FUN=mean)
aggregated.means<- rename(aggregated.means, restricted = restricted_verb)

yarrr::pirateplot(formula = response ~ restricted + condition,
                  data = aggregated.means,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

judgment_unattested_novel_preemption.df <- subset(judgment_unattested_novel.df, condition == "preemption")   
round(tapply(judgment_unattested_novel_preemption.df$response, judgment_unattested_novel_preemption.df$restricted_verb, mean),3)
##    no   yes 
## 3.026 2.362
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel_preemption.df, list("restricted_verb"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_preemption_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_preemption_model, variable = c("b_Intercept", "b_restricted_verb.ct"))
##                        Estimate  Est.Error       Q2.5      Q97.5
## b_Intercept           2.6909335 0.09947207  2.4976072  2.8858104
## b_restricted_verb.ct -0.6461515 0.16711744 -0.9752221 -0.3163207
mcmc_plot(judgments_preemption_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_preemption_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         1
# BF analyses: we use the difference between attested and unattested in the pilot study reported at rpubs.com/AnnaSamara/333562 as the maximum difference we could expect in comparing rating for unattested vs. novel constructions (SD = 3.15/2)
Bf(0.17, 0.65, uniform = 0, meanoftheory = 0, sdtheory = 3.15/2, tail = 1)
## $LikelihoodTheory
## [1] 0.4629748
## 
## $Likelihoodnull
## [1] 0.001570015
## 
## $BayesFactor
## [1] 294.8856
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.17, 0.65, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# RRs for which BF > 3
ev_for_h1 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0.06
print(high_threshold)
## [1] 4

Question 3: Does statistical entrenchment constrain verb argument construction generalizations in adults (judgment data)?

#first, filter our semantically incorrect trials
entrenchment_judgment_unattested_novel.df <- subset(entrenchment_judgment.df, semantically_correct == "1")   

#we only want to keep novel

entrenchment_judgment_novel.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items

entrenchment_judgment_unattested_constr1.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
entrenchment_judgment_unattested_constr2.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

entrenchment_judgment_unattested_novel.df <- rbind(entrenchment_judgment_novel.df, entrenchment_judgment_unattested_constr1.df, entrenchment_judgment_unattested_constr2.df)
entrenchment_judgment_unattested_novel.df$restricted_verb <- factor(entrenchment_judgment_unattested_novel.df$restricted_verb , levels = c("yes", "no"))

round(tapply(entrenchment_judgment_unattested_novel.df$response, entrenchment_judgment_unattested_novel.df$restricted_verb, mean),3)
##   yes    no 
## 4.552 4.174
#Center variables of interest using the lizCenter function:
d_unattested_novel_entrenchment = lizCenter(entrenchment_judgment_unattested_novel.df , list("restricted_verb"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_entrenchment_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct, data=d_unattested_novel_entrenchment, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_entrenchment_model, variable = c("b_Intercept", "b_restricted_verb.ct"))
##                        Estimate Est.Error       Q2.5       Q97.5
## b_Intercept           4.3689660 0.1125748  4.1454265  4.58699507
## b_restricted_verb.ct -0.3641384 0.1657520 -0.6885095 -0.03824153
mcmc_plot(judgments_entrenchment_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_entrenchment_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

# the effect is in the opposite direction

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.9849167
# drawing a max based on the difference between attested vs. unattested in this experiment 

# BF analyses: we use the difference between attested and unattested in this study (attested > unattested provides supporting evidence for entrenchment) as a maximum we expect when comparing ratings for unattested vs. novel constructions (SD = 0.38/2)
Bf(0.16, -0.36, uniform = 0, meanoftheory = 0, sdtheory = 0.38/2, tail = 1)
## $LikelihoodTheory
## [1] 0.0482932
## 
## $Likelihoodnull
## [1] 0.1983728
## 
## $BayesFactor
## [1] 0.2434466
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.16, -0.36, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# RRs for which BF <1/3
ev_for_h0 <- subset(data.frame(range_test), BF < 1/3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0
print(high_threshold)
## [1] 4

Question 4: Is the effect of statistical pre-emption larger than entrenchment (judgment data)?

#first, filter our semantically incorrect trials
all_judgment_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")   

#we only want to keep novel

all_judgment_novel.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items

all_judgment_unattested_constr1.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
all_judgment_unattested_constr2.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

all_judgment_unattested_novel.df <- rbind(all_judgment_novel.df, all_judgment_unattested_constr1.df, all_judgment_unattested_constr2.df)
all_judgment_unattested_novel.df$restricted_verb <- factor(all_judgment_unattested_novel.df$restricted_verb , levels = c("yes", "no"))

round(tapply(all_judgment_unattested_novel.df$response, list(all_judgment_unattested_novel.df$restricted_verb, all_judgment_unattested_novel.df$condition), mean),3)
##     entrenchment preemption
## yes        4.552      2.362
## no         4.174      3.026
#Center variables of interest using the lizCenter function:
df = lizCenter(all_judgment_unattested_novel.df, list("restricted_verb", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_pre_vs_ent_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct * condition.ct, data=df, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_pre_vs_ent_model, variable = c("b_Intercept", "b_restricted_verb.ct", "b_restricted_verb.ct:condition.ct"))
##                                    Estimate  Est.Error       Q2.5     Q97.5
## b_Intercept                       3.2897355 0.07880961 3.13112571 3.4439927
## b_restricted_verb.ct              0.2852059 0.12044918 0.04663904 0.5220161
## b_restricted_verb.ct:condition.ct 0.9953237 0.22753434 0.53935137 1.4405830
mcmc_plot(judgments_pre_vs_ent_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_pre_vs_ent_model))

C1=mean(samps[,"b_restricted_verb.ct"] > 0)
C2=mean(samps[,"b_restricted_verb.ct:condition.ct"] > 0) 
C3=mean(samps[,"b_Intercept"] > 0)

pMCMC=as.data.frame(c(C1,C2,C3))
pMCMC
##   c(C1, C2, C3)
## 1     0.9903333
## 2     1.0000000
## 3     1.0000000
#roughly predicted effect size from adult pilot study was 2.91. Use it as max for unattested vs. novel (SD = 2.91/2)
Bf(0.22, 0.99, uniform = 0, meanoftheory = 0, sdtheory = 2.91/2, tail = 1)
## $LikelihoodTheory
## [1] 0.4323971
## 
## $Likelihoodnull
## [1] 7.265337e-05
## 
## $BayesFactor
## [1] 5951.508
H1RANGE = seq(0,4,by=0.01) # [5-1]-[0] - max effect of preemption minus no effect of entrenchment
range_test <- Bf_range(0.22, 0.99, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.06
print(high_threshold)
## [1] 4

Exploratory data analyses

Effect of statistical pre-emption: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

# Figure 5
judgments_unattested_attested.df <- subset(combined_judgment_data.df, semantically_correct == "1")   
judgments_unattested_attested.df <- subset(judgments_unattested_attested.df, restricted_verb == "yes")   


aggregated.means1 = aggregate(response ~ condition + attested_unattested + participant_private_id, judgments_unattested_attested.df , FUN=mean)
aggregated.means1<- rename(aggregated.means1, attested = attested_unattested)

aggregated.means1$attested<- recode(aggregated.means1$attested, "1" = "yes","0" = "no")


yarrr::pirateplot(formula = response ~  attested  + condition,
                  data = aggregated.means1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

# analyses
attested_vs_unattested = subset(preemption_judgment.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested$response, attested_vs_unattested$attested_unattested, mean),3)
##     0     1 
## 2.362 4.952
#Center variables of interest using the lizCenter function:
d_attested_unattested = lizCenter(attested_vs_unattested , list("attested_unattested"))


# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_preemption <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(attested_unattested_preemption, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                          Estimate  Est.Error     Q2.5    Q97.5
## b_Intercept              3.674384 0.05864062 3.559855 3.791007
## b_attested_unattested.ct 2.549248 0.12006074 2.313172 2.786830
mcmc_plot(attested_unattested_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0
# this prior (5.00-1.85 = 3.15) is drawn from previous pilot study with 10 adults in preemption that was preregistered at https://rpubs.com/AnnaSamara/333562
Bf(0.12, 2.55, uniform = 0, meanoftheory = 0, sdtheory = 3.15, tail = 1)
## $LikelihoodTheory
## [1] 0.1824811
## 
## $Likelihoodnull
## [1] 2.92535e-98
## 
## $BayesFactor
## [1] 6.237925e+96
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.12, 2.55, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 4

Effect of statistical entrenchment: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

attested_vs_unattested_ent = subset(entrenchment_judgment.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested_ent$response, attested_vs_unattested_ent$attested_unattested, mean),3)
##     0     1 
## 4.552 4.942
#Center variables of interest using the lizCenter function:
d_attested_unattested_ent = lizCenter(attested_vs_unattested_ent, list("attested_unattested"))


# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested_ent, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                           Estimate  Est.Error      Q2.5     Q97.5
## b_Intercept              4.7500078 0.06281272 4.6259234 4.8727819
## b_attested_unattested.ct 0.3819432 0.12580665 0.1329204 0.6327987
mcmc_plot(attested_unattested_entrenchment, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##     c(C1, C2)
## 1 0.000000000
## 2 0.001666667
# we preregistered that the max effect of entrenchment here would be 1 based on adult data suggesting difference never more than 1 with novel verbs, i.e. SD = 0.5
Bf(0.13, 0.38, uniform = 0, meanoftheory = 0, sdtheory = 0.5, tail = 1)
## $LikelihoodTheory
## [1] 1.175537
## 
## $Likelihoodnull
## [1] 0.04281328
## 
## $BayesFactor
## [1] 27.45731
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.13, 0.38, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.06
print(high_threshold)
## [1] 4

Entrenchment vs. preemption: ratings for witnessed vs. unwitnessed forms

attested_vs_unattested_across = subset(combined_judgment_data.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested_across$response, list(attested_vs_unattested_across$condition, attested_vs_unattested_across$attested_unattested), mean),3)
##                  0     1
## entrenchment 4.552 4.942
## preemption   2.362 4.952
#Center variables of interest using the lizCenter function:
df_attested_unattested = lizCenter(attested_vs_unattested_across, list("attested_unattested", "condition"))


# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment_preemption <- brm(formula = response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct * condition.ct, data=df_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment_preemption, variable = c("b_Intercept","b_condition.ct", "b_attested_unattested.ct","b_attested_unattested.ct:condition.ct"))
##                                        Estimate  Est.Error      Q2.5      Q97.5
## b_Intercept                            4.055598 0.04397886  3.969816  4.1415247
## b_condition.ct                        -1.052575 0.08443952 -1.215653 -0.8863255
## b_attested_unattested.ct               1.782584 0.08963051  1.606869  1.9573967
## b_attested_unattested.ct:condition.ct  2.114945 0.17132897  1.779799  2.4520575
mcmc_plot(attested_unattested_entrenchment_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_condition.ct"] < 0)
C3=mean(samps[,"b_attested_unattested.ct"] < 0) 
C4=mean(samps[,"b_attested_unattested.ct:condition.ct"] < 0) 

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1                 0
## 2                 1
## 3                 0
## 4                 0
#roughly predicted effect size from adult pilot study: 2.91
Bf(0.17, 2.11, uniform = 0, meanoftheory = 0, sdtheory = 2.91, tail = 1)
## $LikelihoodTheory
## [1] 0.210635
## 
## $Likelihoodnull
## [1] 8.289253e-34
## 
## $BayesFactor
## [1] 2.541061e+32
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.17, 2.11, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.02
print(high_threshold)
## [1] 4

Production data: Effect of statistical pre-emption

#Figure 6

data_long <- gather(preemption_production.df, det_type, produced, det1:none, factor_key=TRUE)

p = ggplot(data_long, aes(x = verb_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme_bw()+
  theme(panel.grid.major = element_blank()) +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("transitive causative", "periphrastic causative", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "transitive causative", "construction2" = "periphrastic causative", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("verb type at training")
p

#Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted verbs against chance 

production_preemption_attested_unattested.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_attested_unattested.df <- subset(production_preemption_attested_unattested.df, restricted_verb =="yes")

round(tapply(production_preemption_attested_unattested.df $attested_unattested, production_preemption_attested_unattested.df $verb_type_training2, mean),3)
## construction1 construction2 
##         0.994         0.994
production_preemption_attested_unattested.df$verb_type_training2 <- factor(production_preemption_attested_unattested.df$verb_type_training2)

df_prod = lizCenter(production_preemption_attested_unattested.df , list("verb_type_training2"))  

# maximally vague priors for the predictors and the intercept
prod_attested_unattested = brm(formula = attested_unattested ~verb_type_training2.ct + (1 + verb_type_training2.ct|participant_private_id), data=df_prod, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested, variable = c("b_Intercept","b_verb_type_training2.ct"))
##                             Estimate Est.Error      Q2.5    Q97.5
## b_Intercept              4.677061043 0.4179287  3.940341 5.574792
## b_verb_type_training2.ct 0.003210755 0.6338915 -1.253183 1.224894
mcmc_plot(prod_attested_unattested, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_verb_type_training2.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.4943333
#same analyses without verb_training_type

# maximally vague priors for the intercept
prod_attested_unattested_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 4.523368 0.4014143 3.830682 5.400971
mcmc_plot(prod_attested_unattested_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_final))
C1=mean(samps[,"b_Intercept"] < 0)


# We will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the restricted verbs than for the novel verb


production_preemption_restricted_novel.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_restricted_novel.df<- subset(production_preemption_restricted_novel.df, verb_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$verb_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_preemption_restricted_novel.df$attested_unattested)

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$verb_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_preemption_restricted_novel.df$attested_unattested)

round(tapply(production_preemption_restricted_novel.df$attested_unattested , production_preemption_restricted_novel.df$verb_type_training2, mean),3)
## construction1 construction2         novel 
##         0.994         0.994         0.470
round(tapply(production_preemption_restricted_novel.df$attested_unattested , production_preemption_restricted_novel.df$restricted_verb, mean),3)
##    no   yes 
## 0.470 0.994
production_preemption_restricted_novel.df$restricted_verb <- factor(production_preemption_restricted_novel.df$restricted_verb)
production_preemption_restricted_novel1.df = lizCenter(production_preemption_restricted_novel.df, list("restricted_verb"))

# maximally vague priors for the predictors and the intercept
prod_unattested_novel_final = brm(formula = attested_unattested ~restricted_verb.ct + (1 + restricted_verb.ct|participant_private_id), data=production_preemption_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_unattested_novel_final, variable = c("b_Intercept","b_restricted_verb.ct"))
##                      Estimate Est.Error     Q2.5    Q97.5
## b_Intercept          3.249545 0.3480186 2.623397 3.994797
## b_restricted_verb.ct 3.792800 0.6639768 2.411906 5.020768
mcmc_plot(prod_unattested_novel_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##      c(C1, C2)
## 1 0.000000e+00
## 2 8.333333e-05

Production data: Effect of statistical entrenchment

# Figure 7
data_long_e <- gather(entrenchment_production.df, det_type, produced, det1:none, factor_key=TRUE)

data_long_e$transitivity_test_scene2 <-recode(data_long_e$transitivity_test_scene2, "construction1" = "test: transitive causative","construction2" = "test: intransitive inchoative")


p = ggplot(data_long_e, aes(x = verb_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme(panel.grid.major = element_blank()) +
  facet_grid("transitivity_test_scene2") +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("transitive causative", "intransitive inchoative", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "transitive causative", "construction2" = "intransitive inchoative", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("verb type at training")
p

#a. Are participants producing more attested than unattested dets?
# here, we want to see how often participants say the unattested e.g. transitive-only det1 for a det2 (intransitive-only) verb in the intransitive condition at test 
# and vice versa 

production_entrenchment_attested_unattested.df  <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_attested_unattested.df  <- subset(production_entrenchment_attested_unattested.df, restricted_verb =="yes")

#We want to compare attested vs. unattested trials for transitive verbs in the intransitive inchoative construction at test
production_entrenchment_attested_unattested1.df  <- subset(production_entrenchment_attested_unattested.df, verb_type_training2 == "construction1" & transitivity_test_scene2 == "construction2")

#And intransitive inchoative verbs in the transitive construction at test. Filter out irrelevant trials
production_entrenchment_attested_unattested2.df  <- subset(production_entrenchment_attested_unattested.df, verb_type_training2 == "construction2" & transitivity_test_scene2 == "construction1")


production_entrenchment_attested_unattested.df <- rbind(production_entrenchment_attested_unattested1.df, production_entrenchment_attested_unattested2.df)

#How much of the time are participants producing attested items?
round(mean(production_entrenchment_attested_unattested.df$attested_unattested),3)
## [1] 0.148
# and separately for each verb type
round(tapply(production_entrenchment_attested_unattested.df$attested_unattested, production_entrenchment_attested_unattested.df$verb_type_training2, mean),3)
## construction1 construction2 
##         0.174         0.122
production_entrenchment_attested_unattested.df$verb_type_training2 <- factor(production_entrenchment_attested_unattested.df$verb_type_training2)
df_prod_ent = lizCenter((production_entrenchment_attested_unattested.df), list("verb_type_training2"))  


# maximally vague priors for the predictors and the intercept
prod_attested_unattested_ent = brm(formula = attested_unattested ~verb_type_training2.ct + (1 + verb_type_training2.ct|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent, variable = c("b_Intercept","b_verb_type_training2.ct"))
##                            Estimate Est.Error      Q2.5      Q97.5
## b_Intercept              -2.7267163 0.4927206 -3.725223 -1.7742433
## b_verb_type_training2.ct -0.6347772 0.4665261 -1.546643  0.3005695
mcmc_plot(prod_attested_unattested_ent, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_verb_type_training2.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1   1.00000
## 2   0.08275
#same analyses without verb_training_type


# maximally vague priors for the intercept
prod_attested_unattested_ent_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

summary(prod_attested_unattested_ent_final, WAIC=T)
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: attested_unattested ~ 1 + (1 | participant_private_id) 
##    Data: df_prod_ent (Number of observations: 344) 
##   Draws: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
##          total post-warmup draws = 12000
## 
## Group-Level Effects: 
## ~participant_private_id (Number of levels: 43) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.83      0.62     1.85     4.30 1.00     3731     5237
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -2.65      0.47    -3.59    -1.75 1.00     5371     6921
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
posterior_summary(prod_attested_unattested_ent_final, variable = c("b_Intercept"))
##              Estimate Est.Error      Q2.5     Q97.5
## b_Intercept -2.646307 0.4692773 -3.592392 -1.752829
mcmc_plot(prod_attested_unattested_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 1
# c. we will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the 2 non-alternating verbs than for the novel verb (presumably the “unwitnessed” form has to be set arbitrarily here)


production_entrenchment_restricted_novel.df <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_restricted_novel.df<- subset(production_entrenchment_restricted_novel.df, verb_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_entrenchment_restricted_novel.df$attested_unattested)
production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_entrenchment_restricted_novel.df$attested_unattested)

# select trials featuring the novel verb in the intransitive inchoative construction
production_entrenchment_restricted_novel1.df <- subset(production_entrenchment_restricted_novel.df, verb_type_training2 == "novel"  & transitivity_test_scene2 == "construction2")


# Select trials featuring transitive verbs in the intransitive inchoative construction at test
production_entrenchment_restricted_novel2.df  <- subset(production_entrenchment_restricted_novel.df, verb_type_training2 == "construction1" & transitivity_test_scene2 == "construction2")

# Select trials featuring intransitive verbs in the transitive construction at test
production_entrenchment_restricted_novel3.df  <- subset(production_entrenchment_restricted_novel.df, verb_type_training2 == "construction2" & transitivity_test_scene2 == "construction1")


production_entrenchment_restricted_novel.df <- rbind(production_entrenchment_restricted_novel1.df, production_entrenchment_restricted_novel2.df, production_entrenchment_restricted_novel3.df)


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$verb_type_training2, mean),3)
## construction1 construction2         novel 
##         0.174         0.122         0.036
# reverse coding to focus on unattested rather than attested for novel vs. restricted
production_entrenchment_restricted_novel.df <- rbind(production_entrenchment_restricted_novel1.df, production_entrenchment_restricted_novel2.df, production_entrenchment_restricted_novel3.df)
production_entrenchment_restricted_novel.df$attested_unattested<- recode(production_entrenchment_restricted_novel.df$attested_unattested, `1` = 0L, `0` = 1L)


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$restricted_verb, mean),3)
##    no   yes 
## 0.964 0.852
#what this means is that participants produce *unattested forms* less for the restricted than they do for the novel

production_entrenchment_restricted_novel.df$restricted_verb <- factor(production_entrenchment_restricted_novel.df$restricted_verb)
production_entrenchment_restricted_novel1.df = lizCenter(production_entrenchment_restricted_novel.df, list("restricted_verb"))


# maximally vague priors for the predictors and the intercept
prod_unattested_novel_ent_final = brm(formula = attested_unattested ~restricted_verb.ct + (1 + restricted_verb.ct|participant_private_id), data=production_entrenchment_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_unattested_novel_ent_final, variable = c("b_Intercept","b_restricted_verb.ct"))
##                       Estimate Est.Error      Q2.5     Q97.5
## b_Intercept           3.138692 0.3967049  2.403408 3.9618964
## b_restricted_verb.ct -0.711761 0.6243892 -1.980648 0.4770178
mcmc_plot(prod_unattested_novel_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1    0.0000
## 2    0.1215

Experiment 2

Load data

#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp2_production_data.csv")

combined_judgment_data.df <- read.csv("exp2_judgment_data.csv")
combined_judgment_data.df$restricted_verb <- factor(combined_judgment_data.df$restricted_verb)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)


#separately for entrenchment and preemption

#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$transitivity_test_scene2 <- factor(entrenchment_production.df$transitivity_test_scene2)

# Create columns that we will need to run production analyses in entrenchment
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced

entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)


entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$transitivity_test_scene2 <- factor(entrenchment_judgment.df$transitivity_test_scene2)
entrenchment_judgment.df$restricted_verb <- factor(entrenchment_judgment.df$restricted_verb)


#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct)  #actually, all NAs here
preemption_production.df$transitivity_test_scene2 <- factor(preemption_production.df$transitivity_test_scene2)

# Create columns that we will need to run production analyses in pre-emption
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced

preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)


preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$transitivity_test_scene2 <- factor(preemption_judgment.df$transitivity_test_scene2)
preemption_judgment.df$restricted_verb <- factor(preemption_judgment.df$restricted_verb)

Preregistered data analyses

Question 1: Have participants picked up on the difference in meaning between the two argument-structure constructions?

Production data

#Figure 8

RQ1_graph_productions.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "alternating" |verb_type_training2 == "novel")
RQ1_graph_productions.df = subset(RQ1_graph_productions.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.graph1 = aggregate(semantically_correct ~ verb_type_training2 + participant_private_id, RQ1_graph_productions.df, FUN=mean)

aggregated.graph1 <- rename(aggregated.graph1, verb = verb_type_training2,
                            correct = semantically_correct)

yarrr::pirateplot(formula = correct  ~ verb,
                  data = aggregated.graph1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "% semantically correct",
                  cex.lab = 1,
                  cex.axis = 1,
                  cex.names = 1,
                  yaxt = "n")

axis(2, at = seq(0, 1, by = 0.25), las=1)
abline(h = 0.50, lty = 2)

#1 alternating verb production

alternating_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "alternating")

#and filter out responses where participants said something other than det1 or det2
alternating_prod.df = subset(alternating_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_alternating_prod.df = aggregate(semantically_correct ~ transitivity_test_scene2 + participant_private_id, alternating_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_alternating_prod.df$semantically_correct),3)
## [1] 0.975
# average accuracy separately for causative and inchoative scenes
round(tapply(aggregated.means_alternating_prod.df$semantically_correct, aggregated.means_alternating_prod.df$transitivity_test_scene2, mean),3)
## construction1 construction2 
##         0.963         0.988
a = lizCenter(alternating_prod.df, list("transitivity_test_scene2"))   

# maximally vague priors for the intercept and the predictors
alternating_model <- brm(formula = semantically_correct~transitivity_test_scene2.ct + (1 + transitivity_test_scene2.ct|participant_private_id), data=a, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model, variable = c("b_Intercept", "b_transitivity_test_scene2.ct" ))
##                                Estimate Est.Error       Q2.5    Q97.5
## b_Intercept                   3.6294500 0.3743762  2.9686052 4.446438
## b_transitivity_test_scene2.ct 0.5144216 0.5860461 -0.6409761 1.665358
mcmc_plot(alternating_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1   0.00000
## 2   0.81175
# no difference between construction 1 and construction 2

# Final model
# maximally vague priors for the intercept 
alternating_model_final = brm(formula = semantically_correct~1 + (1|participant_private_id), data=a, family = bernoulli(link = logit),set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 3.444666 0.3329475 2.850137 4.158513
mcmc_plot(alternating_model_final, variable = "b_Intercept", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
#2 novel verb production

novel_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "novel")

#and filter out responses where participants said something other than det1 or det2
novel_prod.df = subset(novel_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_novel_prod.df = aggregate(semantically_correct ~ transitivity_test_scene2 + participant_private_id, novel_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_novel_prod.df$semantically_correct),3)
## [1] 0.96
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_novel_prod.df$semantically_correct, aggregated.means_novel_prod.df$transitivity_test_scene2, mean),3)
## construction1 construction2 
##         0.975         0.946
b = lizCenter(novel_prod.df, list("transitivity_test_scene2"))  


# maximally vague priors for the intercept and the predictors
novel_model <- brm(formula = semantically_correct~transitivity_test_scene2.ct + (1 + transitivity_test_scene2.ct|participant_private_id), data=b, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model, variable = c("b_Intercept", "b_transitivity_test_scene2.ct"))
##                                 Estimate Est.Error      Q2.5     Q97.5
## b_Intercept                    3.6280652 0.4340609  2.851709 4.5649594
## b_transitivity_test_scene2.ct -0.3118269 0.6120362 -1.494837 0.9021442
mcmc_plot(novel_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.7016667
# no difference between construction 1 and construction 2  
# Final model

# maximally vague priors for the intercept 
novel_model_final <- brm(formula = semantically_correct~1+ (1 + 1|participant_private_id), data=b, family = bernoulli(link = logit), set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 3.488439 0.4127628 2.761845 4.379939
mcmc_plot(novel_model_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0

Judgment data

#Figure 9
RQ1_graph_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "alternating" |verb_type_training2 == "novel")

# aggregated dataframe for means
aggregated.graph2 = aggregate(response ~ verb_type_training2+ semantically_correct + participant_private_id, RQ1_graph_judgments.df, FUN=mean)
aggregated.graph2$semantically_correct <- recode(aggregated.graph2$semantically_correct, "1" = "yes","0" = "no")

aggregated.graph2 <- rename(aggregated.graph2, verb = verb_type_training2,
                            correct = semantically_correct)

yarrr::pirateplot(formula = response ~ correct + verb,
                  data = aggregated.graph2,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

alternating_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "alternating")

#1 alternating verb judgments

# aggregated dataframe for means
aggregated.means_alternating_judgments = aggregate(response ~ transitivity_test_scene2 + semantically_correct + participant_private_id, alternating_judgments.df, FUN=mean)

# average accuracy for semantically correct vs. incorrect trials across causative and noncausative trial types
round(tapply(aggregated.means_alternating_judgments$response, aggregated.means_alternating_judgments$semantically_correct, mean),3)
##     0     1 
## 2.294 4.775
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_alternating_judgments$response, list(aggregated.means_alternating_judgments$semantically_correct, aggregated.means_alternating_judgments$transitivity_test_scene2), mean),3)
##   construction1 construction2
## 0         2.350         2.237
## 1         4.812         4.737
c = lizCenter(alternating_judgments.df, list("transitivity_test_scene2", "semantically_correct"))  

# maximally vague priors for the predictors (we don't interpret the intercept here)
alternating_model_judgments <-brm(formula = response~transitivity_test_scene2.ct * semantically_correct.ct + (1 + transitivity_test_scene2.ct*semantically_correct.ct|participant_private_id), data=c, family = gaussian(), set_prior("normal(0,1)", class="b"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_judgments, variable = c("b_Intercept", "b_transitivity_test_scene2.ct","b_semantically_correct.ct", "b_transitivity_test_scene2.ct:semantically_correct.ct"))
##                                                          Estimate  Est.Error
## b_Intercept                                            3.54574403 0.07034315
## b_transitivity_test_scene2.ct                         -0.09220617 0.07919043
## b_semantically_correct.ct                              2.43114857 0.14006444
## b_transitivity_test_scene2.ct:semantically_correct.ct  0.03816703 0.15936784
##                                                             Q2.5      Q97.5
## b_Intercept                                            3.4081144 3.68443678
## b_transitivity_test_scene2.ct                         -0.2475512 0.06479614
## b_semantically_correct.ct                              2.1525582 2.70409415
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.2748796 0.35411775
samps = as.matrix(as.mcmc(alternating_model_judgments))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
C3=mean(samps[,"b_semantically_correct.ct"] < 0)
C4=mean(samps[,"b_transitivity_test_scene2.ct:semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.1210833
## 3         0.0000000
## 4         0.4032500
# no difference between construction 1 and construction 2

# Final model

# maximally vague priors for the predictors (we don't interpret the intercept here)
alternating_model_judgments_final <-brm(formula = response~semantically_correct.ct + (1 + semantically_correct.ct|participant_private_id), data=c, family = gaussian(), set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(alternating_model_judgments_final, variable = c("b_Intercept", "b_semantically_correct.ct"))
##                           Estimate  Est.Error     Q2.5    Q97.5
## b_Intercept               3.548205 0.07092643 3.409035 3.688373
## b_semantically_correct.ct 2.431656 0.14440410 2.140132 2.714848
mcmc_plot(alternating_model_judgments_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_judgments_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0
#2 novel verb judgments

novel_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "novel")

# aggregated dataframe for means
aggregated.means_novel_judgments = aggregate(response ~ transitivity_test_scene2 + semantically_correct + participant_private_id, novel_judgments.df, FUN=mean)

# average accuracy for semantically correct vs. incorrect trials across causative and noncausative trial types
round(tapply(aggregated.means_novel_judgments$response, aggregated.means_novel_judgments$semantically_correct, mean),3)
##     0     1 
## 2.163 3.944
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_novel_judgments$response, list(aggregated.means_novel_judgments$semantically_correct, aggregated.means_novel_judgments$transitivity_test_scene2), mean),3)
##   construction1 construction2
## 0         2.188         2.138
## 1         3.938         3.950
d = lizCenter(novel_judgments.df, list("transitivity_test_scene2", "semantically_correct"))  


# maximally vague priors for the predictors (we don't interpret the intercept here) 
novel_model_judgments <-brm(formula = response~transitivity_test_scene2.ct * semantically_correct.ct + (1 + transitivity_test_scene2.ct*semantically_correct.ct|participant_private_id), data=d, family = gaussian(), set_prior("normal(0,1)", class="b"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model_judgments, variable = c("b_Intercept", "b_transitivity_test_scene2.ct","b_semantically_correct.ct", "b_transitivity_test_scene2.ct:semantically_correct.ct"))
##                                                          Estimate  Est.Error
## b_Intercept                                            3.04224545 0.10894632
## b_transitivity_test_scene2.ct                         -0.02153686 0.09462807
## b_semantically_correct.ct                              1.72216055 0.18015735
## b_transitivity_test_scene2.ct:semantically_correct.ct  0.05993910 0.18645361
##                                                             Q2.5     Q97.5
## b_Intercept                                            2.8281014 3.2526802
## b_transitivity_test_scene2.ct                         -0.2070896 0.1647077
## b_semantically_correct.ct                              1.3574122 2.0665965
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.3023050 0.4295915
mcmc_plot(novel_model_judgments, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_judgments))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
C3=mean(samps[,"b_semantically_correct.ct"] < 0)
C4=mean(samps[,"b_transitivity_test_scene2.ct:semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.4053333
## 3         0.0000000
## 4         0.3715000
# no difference between construction 1 and construction 2
# Final model

# maximally vague priors for the predictors (we don't interpret the intercept here) 
novel_model_judgments_final <-brm(formula = response~semantically_correct.ct + (1 + semantically_correct.ct|participant_private_id), data=d, family = gaussian(), set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(novel_model_judgments_final, variable = c("b_Intercept", "b_semantically_correct.ct"))
##                           Estimate Est.Error     Q2.5    Q97.5
## b_Intercept               3.040259 0.1120357 2.817493 3.260148
## b_semantically_correct.ct 1.716845 0.1839560 1.346918 2.076527
mcmc_plot(novel_model_judgments_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_judgments_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_semantically_correct.ct"] < 0)


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0

Question 2: Does statistical pre-emption constrain verb argument construction generalizations in adults (judgment data)?

#Figure 10

#first, filter our semantically incorrect trials

judgments_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")   

#we only want to keep novel

judgments_novel.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items

judgment_unattested_constr1.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
judgment_unattested_constr2.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

judgment_unattested_novel.df <- rbind(judgments_novel.df, judgment_unattested_constr1.df, judgment_unattested_constr2.df)

aggregated.means = aggregate(response ~ condition + restricted_verb + participant_private_id, judgment_unattested_novel.df, FUN=mean)
aggregated.means<- rename(aggregated.means, restricted = restricted_verb)

yarrr::pirateplot(formula = response ~ restricted + condition,
                  data = aggregated.means,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

judgment_unattested_novel_preemption.df <- subset(judgment_unattested_novel.df, condition == "preemption")   
round(tapply(judgment_unattested_novel_preemption.df$response, judgment_unattested_novel_preemption.df$restricted_verb, mean),3)
##    no   yes 
## 2.644 2.259
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel_preemption.df, list("restricted_verb"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_preemption_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(judgments_preemption_model, variable = c("b_Intercept", "b_restricted_verb.ct"))
##                        Estimate  Est.Error       Q2.5       Q97.5
## b_Intercept           2.4479803 0.09123043  2.2707631  2.62902314
## b_restricted_verb.ct -0.3781125 0.18331196 -0.7387568 -0.01734214
mcmc_plot(judgments_preemption_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_preemption_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.9798333
# BF analyses: we use the difference between attested and unattested in Experiment1 (SD = 0.65) as an estimate of the difference we expect in comparing rating for unattested vs. novel constructions 
Bf(0.18, 0.37, uniform = 0, meanoftheory = 0, sdtheory = 0.65, tail = 1)
## $LikelihoodTheory
## [1] 0.9929748
## 
## $Likelihoodnull
## [1] 0.267993
## 
## $BayesFactor
## [1] 3.705227
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.18, 0.37, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# RRs for which BF > 3
ev_for_h1 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0.14
print(high_threshold)
## [1] 0.87

Question 3: Does statistical entrenchment constrain verb argument construction generalizations in adults (judgment data)?

#first, filter our semantically incorrect trials

entrenchment_judgment_unattested_novel.df <- subset(entrenchment_judgment.df, semantically_correct == "1")   

#we only want to keep novel

entrenchment_judgment_novel.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items

entrenchment_judgment_unattested_constr1.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
entrenchment_judgment_unattested_constr2.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

entrenchment_judgment_unattested_novel.df <- rbind(entrenchment_judgment_novel.df, entrenchment_judgment_unattested_constr1.df, entrenchment_judgment_unattested_constr2.df)
entrenchment_judgment_unattested_novel.df$restricted_verb <- factor(entrenchment_judgment_unattested_novel.df$restricted_verb , levels = c("yes", "no"))

round(tapply(entrenchment_judgment_unattested_novel.df$response, entrenchment_judgment_unattested_novel.df$restricted_verb, mean),3)
##   yes    no 
## 4.356 3.944
#Center variables of interest using the lizCenter function:
d_unattested_novel_entrenchment = lizCenter(entrenchment_judgment_unattested_novel.df, list("restricted_verb"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_entrenchment_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct, data=d_unattested_novel_entrenchment, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_entrenchment_model, variable = c("b_Intercept", "b_restricted_verb.ct"))
##                        Estimate Est.Error      Q2.5       Q97.5
## b_Intercept           4.1555352 0.1264424  3.912003  4.40360369
## b_restricted_verb.ct -0.3992576 0.1730160 -0.732444 -0.05591938
mcmc_plot(judgments_entrenchment_model, variable = "b_Intercept", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_entrenchment_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.9880833
# drawing a max based on the difference between attested vs. unattested in this experiment (this was sig. evidence for entrenchment)
Bf(0.17, -0.40, uniform = 0, meanoftheory = 0, sdtheory = 0.38/2, tail = 1)
## $LikelihoodTheory
## [1] 0.03663446
## 
## $Likelihoodnull
## [1] 0.1473201
## 
## $BayesFactor
## [1] 0.2486726
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.17, -0.40, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF < 1/3
ev_for_h1 <- subset(data.frame(range_test), BF < 1/3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0
print(high_threshold)
## [1] 4

Question 4: Is the effect of statistical pre-emption larger than entrenchment (judgment data)?

#first, filter our semantically incorrect trials

all_judgment_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")   

#we only want to keep novel

all_judgment_novel.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items

all_judgment_unattested_constr1.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
all_judgment_unattested_constr2.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

all_judgment_unattested_novel.df <- rbind(all_judgment_novel.df, all_judgment_unattested_constr1.df, all_judgment_unattested_constr2.df)
all_judgment_unattested_novel.df$restricted_verb <- factor(all_judgment_unattested_novel.df$restricted_verb , levels = c("yes", "no"))

round(tapply(all_judgment_unattested_novel.df$response, list(all_judgment_unattested_novel.df$restricted_verb, all_judgment_unattested_novel.df$condition), mean),3)
##     entrenchment preemption
## yes        4.356      2.259
## no         3.944      2.644
# preemption worked and opposite effect for entrenchment

#Center variables of interest using the lizCenter function:
df = lizCenter(all_judgment_unattested_novel.df, list("restricted_verb", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_pre_vs_ent_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct * condition.ct, data=df, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_pre_vs_ent_model, variable = c("b_Intercept", "b_restricted_verb.ct","b_condition.ct", "b_restricted_verb.ct:condition.ct"))
##                                     Estimate  Est.Error       Q2.5      Q97.5
## b_Intercept                        3.0259749 0.08066798  2.8700799  3.1845857
## b_restricted_verb.ct               0.1145929 0.13034168 -0.1394403  0.3732859
## b_condition.ct                    -1.6621060 0.14728154 -1.9478254 -1.3691556
## b_restricted_verb.ct:condition.ct  0.7634428 0.24575061  0.2796771  1.2401513
mcmc_plot(judgments_pre_vs_ent_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_pre_vs_ent_model))

C1=mean(samps[,"b_Intercept"] < 0) 
C2=mean(samps[,"b_restricted_verb.ct"] > 0)
C3=mean(samps[,"b_condition.ct"] > 0) 
C4=mean(samps[,"b_restricted_verb.ct:condition.ct"] > 0) 


pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1           0.00000
## 2           0.81175
## 3           0.00000
## 4           0.99850
#roughly predicted effect size from previous study was 1.0. Use it as an estimate of the effect we expect here
Bf(0.24, 0.77, uniform = 0, meanoftheory = 0, sdtheory = 1.00, tail = 1)
## $LikelihoodTheory
## [1] 0.5856484
## 
## $Likelihoodnull
## [1] 0.009671967
## 
## $BayesFactor
## [1] 60.55112
H1RANGE = seq(0,4,by=0.01) # [5-1]-[0] - max effect of preemption minus no effect of entrenchment
range_test <- Bf_range(0.24, 0.77, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.09
print(high_threshold)
## [1] 4

Exploratory data analyses

Effect of statistical pre-emption: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

# Figure 11
judgments_unattested_attested.df <- subset(combined_judgment_data.df, semantically_correct == "1")   
judgments_unattested_attested.df <- subset(judgments_unattested_attested.df, restricted_verb == "yes")   


aggregated.means1 = aggregate(response ~ condition + attested_unattested + participant_private_id, judgments_unattested_attested.df , FUN=mean)
aggregated.means1<- rename(aggregated.means1, attested = attested_unattested)

aggregated.means1$attested<- recode(aggregated.means1$attested, "1" = "yes","0" = "no")


yarrr::pirateplot(formula = response ~  attested  + condition,
                  data = aggregated.means1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

# analyses
attested_vs_unattested = subset(preemption_judgment.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested$response, attested_vs_unattested$attested_unattested, mean),3)
##     0     1 
## 2.259 4.881
#Center variables of interest using the lizCenter function:
d_attested_unattested = lizCenter(attested_vs_unattested , list("attested_unattested"))


# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_preemption <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(attested_unattested_preemption, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                          Estimate  Est.Error     Q2.5    Q97.5
## b_Intercept              3.581521 0.05452928 3.475023 3.689603
## b_attested_unattested.ct 2.586747 0.11820191 2.348800 2.813750
mcmc_plot(attested_unattested_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0
# this prioris drawn from Experiment 1
Bf(0.12, 2.59, uniform = 0, meanoftheory = 0, sdtheory = 2.55, tail = 1)
## $LikelihoodTheory
## [1] 0.1868105
## 
## $Likelihoodnull
## [1] 2.321655e-101
## 
## $BayesFactor
## [1] 8.046437e+99
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.12, 2.59, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 4

Effect of statistical entrenchment: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

attested_vs_unattested_ent = subset(entrenchment_judgment.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested_ent$response, attested_vs_unattested_ent$attested_unattested, mean),3)
##     0     1 
## 4.356 4.925
#Center variables of interest using the lizCenter function:
d_attested_unattested_ent = lizCenter(attested_vs_unattested_ent, list("attested_unattested"))

attested_unattested_entrenchment <- brm(formula = response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested_ent, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                           Estimate Est.Error      Q2.5     Q97.5
## b_Intercept              4.6464417 0.0756561 4.4984699 4.7969937
## b_attested_unattested.ct 0.5557444 0.1399405 0.2794383 0.8292043
mcmc_plot(attested_unattested_entrenchment, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##      c(C1, C2)
## 1 0.000000e+00
## 2 8.333333e-05
# expect a difference of 0.38 from previous work
Bf(0.14, 0.56, uniform = 0, meanoftheory = 0, sdtheory = 0.38, tail = 1)
## $LikelihoodTheory
## [1] 0.7572747
## 
## $Likelihoodnull
## [1] 0.0009559302
## 
## $BayesFactor
## [1] 792.1862
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.14, 0.56, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.04
print(high_threshold)
## [1] 4

Entrenchment vs. preemption: ratings for witnessed vs. unwitnessed forms

attested_vs_unattested_across = subset(combined_judgment_data.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested_across$response, list(attested_vs_unattested_across$condition, attested_vs_unattested_across$attested_unattested), mean),3)
##                  0     1
## entrenchment 4.356 4.925
## preemption   2.259 4.881
#Center variables of interest using the lizCenter function:
df_attested_unattested = lizCenter(attested_vs_unattested_across, list("attested_unattested", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment_preemption <- brm(formula = response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct * condition.ct, data=df_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment_preemption, variable = c("b_Intercept", "b_attested_unattested.ct",  "b_condition.ct", "b_attested_unattested.ct:condition.ct"))
##                                        Estimate  Est.Error      Q2.5     Q97.5
## b_Intercept                            3.938357 0.04619842  3.848582  4.030027
## b_attested_unattested.ct               1.911489 0.09211651  1.729939  2.091841
## b_condition.ct                        -1.033815 0.08921448 -1.212806 -0.859508
## b_attested_unattested.ct:condition.ct  1.969025 0.17885215  1.614986  2.320154
mcmc_plot(attested_unattested_entrenchment_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 
C3=mean(samps[,"b_condition.ct"] > 0) 
C4=mean(samps[,"b_attested_unattested.ct:condition.ct"] < 0) 

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1                 0
## 2                 0
## 3                 0
## 4                 0
#roughly predicted effect size from previous study 2.11

Bf(0.18, 1.97, uniform = 0, meanoftheory = 0, sdtheory = 2.11, tail = 1)
## $LikelihoodTheory
## [1] 0.2444349
## 
## $Likelihoodnull
## [1] 2.165476e-26
## 
## $BayesFactor
## [1] 1.128781e+25
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.18, 1.97, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.02
print(high_threshold)
## [1] 4

Production data: Effect of statistical pre-emption

#Figure 12

data_long <- gather(preemption_production.df, det_type, produced, det1:none, factor_key=TRUE)

p = ggplot(data_long, aes(x = verb_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme_bw()+
  theme(panel.grid.major = element_blank()) +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("transitive causative", "periphrastic causative", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "transitive causative", "construction2" = "periphrastic causative", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("verb type at training")
p

#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel_preemption.df, list("restricted_verb"))


#a. Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted verbs against chance 

production_preemption_attested_unattested.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_attested_unattested.df <- subset(production_preemption_attested_unattested.df, restricted_verb =="yes")

round(tapply(production_preemption_attested_unattested.df$attested_unattested, production_preemption_attested_unattested.df$verb_type_training2, mean),3)
## construction1 construction2 
##         0.997         0.990
production_preemption_attested_unattested.df$verb_type_training2 <- factor(production_preemption_attested_unattested.df$verb_type_training2)

df_prod = lizCenter(production_preemption_attested_unattested.df , list("verb_type_training2"))  

# maximally vague priors for the predictors and the intercept
prod_attested_unattested = brm(formula = attested_unattested ~verb_type_training2.ct + (1 + verb_type_training2.ct|participant_private_id), data=df_prod, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested, variable = c("b_Intercept","b_verb_type_training2.ct"))
##                            Estimate Est.Error      Q2.5     Q97.5
## b_Intercept               4.5959009 0.3988822  3.891276 5.4504003
## b_verb_type_training2.ct -0.2990382 0.6253039 -1.538893 0.9210146
mcmc_plot(prod_attested_unattested, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_verb_type_training2.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1   0.00000
## 2   0.31825
#same analyses without verb_training_type

# maximally vague priors for the intercept
prod_attested_unattested_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5   Q97.5
## b_Intercept  4.44177 0.3745163 3.768263 5.23774
mcmc_plot(prod_attested_unattested_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
# We will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the restricted verbs than for the novel verb

production_preemption_restricted_novel.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_restricted_novel.df<- subset(production_preemption_restricted_novel.df, verb_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$verb_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_preemption_restricted_novel.df$attested_unattested)

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$verb_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_preemption_restricted_novel.df$attested_unattested)

round(tapply(production_preemption_restricted_novel.df$attested_unattested , production_preemption_restricted_novel.df$verb_type_training2, mean),3)
## construction1 construction2         novel 
##         0.997         0.990         0.478
round(tapply(production_preemption_restricted_novel.df$attested_unattested , production_preemption_restricted_novel.df$restricted_verb, mean),3)
##    no   yes 
## 0.478 0.993
production_preemption_restricted_novel.df$restricted_verb <- factor(production_preemption_restricted_novel.df$restricted_verb)
production_preemption_restricted_novel1.df = lizCenter(production_preemption_restricted_novel.df, list("restricted_verb"))

# maximally vague priors for the predictors and the intercept
prod_unattested_novel_final = brm(formula = attested_unattested ~restricted_verb.ct + (1 + restricted_verb.ct|participant_private_id), data=production_preemption_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_unattested_novel_final, variable = c("b_Intercept","b_restricted_verb.ct"))
##                      Estimate Est.Error     Q2.5    Q97.5
## b_Intercept          3.058033 0.2675473 2.569894 3.605754
## b_restricted_verb.ct 3.866624 0.5333746 2.778589 4.885067
mcmc_plot(prod_unattested_novel_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0

Production data: Effect of statistical entrenchment

#Figure 13

# here, we want to see how often participants say det1 (e.g. transitive-only) for a det2 (intransitive-only) verb in the intransitive condition at test. 
# in other words, do they produce the unattested in a semantically correct trial?  

data_long_e <- gather(entrenchment_production.df, det_type, produced, det1:none, factor_key=TRUE)

data_long_e$transitivity_test_scene2 <-recode(data_long_e$transitivity_test_scene2, "construction1" = "test: transitive causative","construction2" = "test: intransitive inchoative")


p = ggplot(data_long_e, aes(x = verb_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme(panel.grid.major = element_blank()) +
  facet_grid("transitivity_test_scene2") +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("transitive causative", "intransitive inchoative", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "transitive causative", "construction2" = "intransitive inchoative", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("verb type at training")
p

#a. Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted verbs against chance 

production_entrenchment_attested_unattested.df  <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_attested_unattested.df  <- subset(production_entrenchment_attested_unattested.df, restricted_verb =="yes")

#How much of the time are participants producing attested items?
round(mean(production_entrenchment_attested_unattested.df$attested_unattested),3)
## [1] 0.56
# and separately for each verb type
round(tapply(production_entrenchment_attested_unattested.df$attested_unattested, production_entrenchment_attested_unattested.df$verb_type_training2, mean),3)
## construction1 construction2 
##         0.571         0.549
production_entrenchment_attested_unattested.df$verb_type_training2 <- factor(production_entrenchment_attested_unattested.df$verb_type_training2)
df_prod_ent = lizCenter((production_entrenchment_attested_unattested.df), list("verb_type_training2"))  


# maximally vague priors for the predictors and the intercept
prod_attested_unattested_ent = brm(formula = attested_unattested ~verb_type_training2.ct + (1 + verb_type_training2.ct|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent, variable = c("b_Intercept", "b_verb_type_training2.ct"))
##                             Estimate  Est.Error        Q2.5     Q97.5
## b_Intercept               0.24156269 0.08370465  0.07616017 0.4073533
## b_verb_type_training2.ct -0.08871167 0.16293018 -0.40596199 0.2284035
mcmc_plot(prod_attested_unattested_ent, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_verb_type_training2.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##     c(C1, C2)
## 1 0.001833333
## 2 0.296666667
# maximally vague priors for the intercept
prod_attested_unattested_ent_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent_final, variable = c("b_Intercept"))
##              Estimate  Est.Error      Q2.5     Q97.5
## b_Intercept 0.2404281 0.08145402 0.0815077 0.3994481
mcmc_plot(prod_attested_unattested_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0.002333333
# c. we will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the 2 non-alternating verbs than for the novel verb (presumably the “unwitnessed” form has to be set arbitrarily here)


production_entrenchment_restricted_novel.df <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_restricted_novel.df<- subset(production_entrenchment_restricted_novel.df, verb_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_entrenchment_restricted_novel.df$attested_unattested)

production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_entrenchment_restricted_novel.df$attested_unattested)

round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$verb_type_training2, mean),3)
## construction1 construction2         novel 
##         0.571         0.549         0.516
# proportion of attested items for each verb type - we will now compare constr1/2 vs. novel


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$restricted_verb, mean),3)
##    no   yes 
## 0.516 0.560
#what this means is that participants produce *unattested forms* less for the restricted (0.433) than they do for the novel (0.511)

production_entrenchment_restricted_novel.df$restricted_verb <- factor(production_entrenchment_restricted_novel.df$restricted_verb)
production_entrenchment_restricted_novel1.df = lizCenter(production_entrenchment_restricted_novel.df, list("restricted_verb"))


# maximally vague priors for the predictors and the intercept
prod_unattested_novel_ent_final = brm(formula = attested_unattested ~restricted_verb.ct + (1 + restricted_verb.ct|participant_private_id), data=production_entrenchment_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(prod_unattested_novel_ent_final, variable = c("b_Intercept","b_restricted_verb.ct"))
##                       Estimate Est.Error        Q2.5     Q97.5
## b_Intercept          0.1827177 0.0678415  0.05002724 0.3156595
## b_restricted_verb.ct 0.1738267 0.1410444 -0.09955456 0.4464903
mcmc_plot(prod_unattested_novel_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1   0.00425
## 2   0.10650

Experiment 3

Load data

#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp3_production_data.csv")

combined_judgment_data.df <- read.csv("exp3_judgment_data.csv")
combined_judgment_data.df$restricted_verb <- factor(combined_judgment_data.df$restricted_verb)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)


#separately for entrenchment and preemption

#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$transitivity_test_scene2 <- factor(entrenchment_production.df$transitivity_test_scene2)

# Create columns that we will need to run production analyses in entrenchment
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced

entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)


entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$transitivity_test_scene2 <- factor(entrenchment_judgment.df$transitivity_test_scene2)
entrenchment_judgment.df$restricted_verb <- factor(entrenchment_judgment.df$restricted_verb)


#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct)  #actually, all NAs here
preemption_production.df$transitivity_test_scene2 <- factor(preemption_production.df$transitivity_test_scene2)

# Create columns that we will need to run production analyses in pre-emption
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced

preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)


preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$transitivity_test_scene2 <- factor(preemption_judgment.df$transitivity_test_scene2)
preemption_judgment.df$restricted_verb <- factor(preemption_judgment.df$restricted_verb)

Preregistered data analyses

Question 1: Have participants picked up on the difference in meaning between the two argument-structure constructions?

Production data

#Figure 14

RQ1_graph_productions.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "alternating" |verb_type_training2 == "novel")
RQ1_graph_productions.df = subset(RQ1_graph_productions.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.graph1 = aggregate(semantically_correct ~ verb_type_training2 + participant_private_id, RQ1_graph_productions.df, FUN=mean)

aggregated.graph1 <- rename(aggregated.graph1, verb = verb_type_training2,
                            correct = semantically_correct)

yarrr::pirateplot(formula = correct  ~ verb,
                  data = aggregated.graph1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "% semantically correct",
                  cex.lab = 1,
                  cex.axis = 1,
                  cex.names = 1,
                  yaxt = "n")

axis(2, at = seq(0, 1, by = 0.25), las=1)
abline(h = 0.50, lty = 2)

#1 alternating verb production

alternating_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "alternating")

#and filter out responses where participants said something other than det1 or det2
alternating_prod.df = subset(alternating_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_alternating_prod.df = aggregate(semantically_correct ~ transitivity_test_scene2 + participant_private_id, alternating_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_alternating_prod.df$semantically_correct),3)
## [1] 0.972
# average accuracy separately for causative and inchoative scenes
round(tapply(aggregated.means_alternating_prod.df$semantically_correct, aggregated.means_alternating_prod.df$transitivity_test_scene2, mean),3)
## construction1 construction2 
##         0.988         0.956
a = lizCenter(alternating_prod.df, list("transitivity_test_scene2"))   

# maximally vague priors for the intercept and the predictors
alternating_model <- brm(formula = semantically_correct~transitivity_test_scene2.ct + (1 + transitivity_test_scene2.ct|participant_private_id), data=a, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model, variable = c("b_Intercept", "b_transitivity_test_scene2.ct" ))
##                                 Estimate Est.Error      Q2.5     Q97.5
## b_Intercept                    3.4899227 0.3403637  2.882208 4.2002169
## b_transitivity_test_scene2.ct -0.6574342 0.5583675 -1.764982 0.4217469
mcmc_plot(alternating_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1     0.000
## 2     0.117
# no difference between construction 1 and construction 2

# Final model
# maximally vague priors for the intercept 
alternating_model_final = brm(formula = semantically_correct~1 + (1|participant_private_id), data=a, family = bernoulli(link = logit),set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 3.356383 0.3203871 2.784352 4.055615
mcmc_plot(alternating_model_final, variable = "b_Intercept", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
#2 novel verb production

novel_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "novel")

#and filter out responses where participants said something other than det1 or det2
novel_prod.df = subset(novel_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_novel_prod.df = aggregate(semantically_correct ~ transitivity_test_scene2 + participant_private_id, novel_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_novel_prod.df$semantically_correct),3)
## [1] 0.959
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_novel_prod.df$semantically_correct, aggregated.means_novel_prod.df$transitivity_test_scene2, mean),3)
## construction1 construction2 
##         0.969         0.950
b = lizCenter(novel_prod.df, list("transitivity_test_scene2"))  


# maximally vague priors for the intercept and the predictors
novel_model <- brm(formula = semantically_correct~transitivity_test_scene2.ct + (1 + transitivity_test_scene2.ct|participant_private_id), data=b, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model, variable = c("b_Intercept", "b_transitivity_test_scene2.ct"))
##                                 Estimate Est.Error      Q2.5     Q97.5
## b_Intercept                    3.4383867 0.3837392  2.755946 4.2529884
## b_transitivity_test_scene2.ct -0.5916346 0.5824472 -1.786498 0.5103841
mcmc_plot(novel_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1     0.000
## 2     0.849
# no difference between construction 1 and construction 2  
# Final model

# maximally vague priors for the intercept 
novel_model_final <- brm(formula = semantically_correct~1+ (1 + 1|participant_private_id), data=b, family = bernoulli(link = logit), set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 3.272273   0.36937 2.614955 4.051532
mcmc_plot(novel_model_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0

Judgment data

#Figure 15
RQ1_graph_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "alternating" |verb_type_training2 == "novel")

# aggregated dataframe for means
aggregated.graph2 = aggregate(response ~ verb_type_training2+ semantically_correct + participant_private_id, RQ1_graph_judgments.df, FUN=mean)
aggregated.graph2$semantically_correct <- recode(aggregated.graph2$semantically_correct, "1" = "yes","0" = "no")

aggregated.graph2 <- rename(aggregated.graph2, verb = verb_type_training2,
                            correct = semantically_correct)

yarrr::pirateplot(formula = response ~ correct + verb,
                  data = aggregated.graph2,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

alternating_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "alternating")

#1 alternating verb judgments

# aggregated dataframe for means
aggregated.means_alternating_judgments = aggregate(response ~ transitivity_test_scene2 + semantically_correct + participant_private_id, alternating_judgments.df, FUN=mean)

# average accuracy for semantically correct vs. incorrect trials across causative and noncausative trial types
round(tapply(aggregated.means_alternating_judgments$response, aggregated.means_alternating_judgments$semantically_correct, mean),3)
##     0     1 
## 2.350 4.862
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_alternating_judgments$response, list(aggregated.means_alternating_judgments$semantically_correct, aggregated.means_alternating_judgments$transitivity_test_scene2), mean),3)
##   construction1 construction2
## 0         2.288         2.413
## 1         4.888         4.838
c = lizCenter(alternating_judgments.df, list("transitivity_test_scene2", "semantically_correct"))  

# maximally vague priors for the predictors (we don't interpret the intercept here)
alternating_model_judgments <-brm(formula = response~transitivity_test_scene2.ct * semantically_correct.ct + (1 + transitivity_test_scene2.ct*semantically_correct.ct|participant_private_id), data=c, family = gaussian(), set_prior("normal(0,1)", class="b"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_judgments, variable = c("b_Intercept", "b_transitivity_test_scene2.ct","b_semantically_correct.ct", "b_transitivity_test_scene2.ct:semantically_correct.ct"))
##                                                          Estimate  Est.Error
## b_Intercept                                            3.62077750 0.06598161
## b_transitivity_test_scene2.ct                          0.03823931 0.07411815
## b_semantically_correct.ct                              2.46958436 0.13153607
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.17401562 0.13693552
##                                                             Q2.5      Q97.5
## b_Intercept                                            3.4902242 3.75024945
## b_transitivity_test_scene2.ct                         -0.1072248 0.18256491
## b_semantically_correct.ct                              2.2086029 2.72396834
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.4472234 0.09789924
samps = as.matrix(as.mcmc(alternating_model_judgments))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
C3=mean(samps[,"b_semantically_correct.ct"] < 0)
C4=mean(samps[,"b_transitivity_test_scene2.ct:semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.6960833
## 3         0.0000000
## 4         0.9024167
# no difference between construction 1 and construction 2

# Final model

# maximally vague priors for the predictors (we don't interpret the intercept here)
alternating_model_judgments_final <-brm(formula = response~semantically_correct.ct + (1 + semantically_correct.ct|participant_private_id), data=c, family = gaussian(), set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(alternating_model_judgments_final, variable = c("b_Intercept", "b_semantically_correct.ct"))
##                           Estimate  Est.Error     Q2.5    Q97.5
## b_Intercept               3.623346 0.06715506 3.491791 3.757007
## b_semantically_correct.ct 2.465308 0.13200070 2.199105 2.719949
mcmc_plot(alternating_model_judgments_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_judgments_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0
#2 novel verb judgments

novel_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "novel")

# aggregated dataframe for means
aggregated.means_novel_judgments = aggregate(response ~ transitivity_test_scene2 + semantically_correct + participant_private_id, novel_judgments.df, FUN=mean)

# average accuracy for semantically correct vs. incorrect trials across causative and noncausative trial types
round(tapply(aggregated.means_novel_judgments$response, aggregated.means_novel_judgments$semantically_correct, mean),3)
##     0     1 
## 2.169 3.825
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_novel_judgments$response, list(aggregated.means_novel_judgments$semantically_correct, aggregated.means_novel_judgments$transitivity_test_scene2), mean),3)
##   construction1 construction2
## 0         2.112         2.225
## 1         3.775         3.875
d = lizCenter(novel_judgments.df, list("transitivity_test_scene2", "semantically_correct"))  


# maximally vague priors for the predictors (we don't interpret the intercept here) 
novel_model_judgments <-brm(formula = response~transitivity_test_scene2.ct * semantically_correct.ct + (1 + transitivity_test_scene2.ct*semantically_correct.ct|participant_private_id), data=d, family = gaussian(), set_prior("normal(0,1)", class="b"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model_judgments, variable = c("b_Intercept", "b_transitivity_test_scene2.ct","b_semantically_correct.ct", "b_transitivity_test_scene2.ct:semantically_correct.ct"))
##                                                           Estimate Est.Error
## b_Intercept                                            2.977580239 0.1380216
## b_transitivity_test_scene2.ct                          0.104547627 0.0759791
## b_semantically_correct.ct                              1.596302049 0.2001465
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.004881464 0.1637303
##                                                              Q2.5     Q97.5
## b_Intercept                                            2.70585375 3.2463714
## b_transitivity_test_scene2.ct                         -0.04379459 0.2544248
## b_semantically_correct.ct                              1.20450377 1.9953912
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.31794347 0.3236626
mcmc_plot(novel_model_judgments, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_judgments))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
C3=mean(samps[,"b_semantically_correct.ct"] < 0)
C4=mean(samps[,"b_transitivity_test_scene2.ct:semantically_correct.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.9153333
## 3         0.0000000
## 4         0.5127500
# no difference between construction 1 and construction 2
# Final model

# maximally vague priors for the predictors (we don't interpret the intercept here) 
novel_model_judgments_final <-brm(formula = response~semantically_correct.ct + (1 + semantically_correct.ct|participant_private_id), data=d, family = gaussian(), set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(novel_model_judgments_final, variable = c("b_Intercept", "b_semantically_correct.ct"))
##                           Estimate Est.Error     Q2.5    Q97.5
## b_Intercept               2.972140 0.1373283 2.706112 3.242973
## b_semantically_correct.ct 1.587756 0.1977276 1.193219 1.969118
mcmc_plot(novel_model_judgments_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_judgments_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_semantically_correct.ct"] < 0)


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0

Question 2: Does statistical pre-emption constrain verb argument construction generalizations in adults (judgment data)?

#Figure 16

#first, filter our semantically incorrect trials

judgments_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")   

#we only want to keep novel

judgments_novel.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items

judgment_unattested_constr1.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
judgment_unattested_constr2.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

judgment_unattested_novel.df <- rbind(judgments_novel.df, judgment_unattested_constr1.df, judgment_unattested_constr2.df)

aggregated.means = aggregate(response ~ condition + restricted_verb + participant_private_id, judgment_unattested_novel.df, FUN=mean)
aggregated.means<- rename(aggregated.means, restricted = restricted_verb)

yarrr::pirateplot(formula = response ~ restricted + condition,
                  data = aggregated.means,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

judgment_unattested_novel_preemption.df <- subset(judgment_unattested_novel.df, condition == "preemption")   
round(tapply(judgment_unattested_novel_preemption.df$response, judgment_unattested_novel_preemption.df$restricted_verb, mean),3)
##    no   yes 
## 2.722 2.423
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel_preemption.df, list("restricted_verb"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_preemption_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(judgments_preemption_model, variable = c("b_Intercept", "b_restricted_verb.ct"))
##                        Estimate  Est.Error       Q2.5      Q97.5
## b_Intercept           2.5723722 0.07410429  2.4263015 2.71715114
## b_restricted_verb.ct -0.2865003 0.17688092 -0.6334017 0.05661316
mcmc_plot(judgments_preemption_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_preemption_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.9486667
# BF analyses: we use the difference between attested and unattested in Experiment1 (SD = 0.65) as an estimate of the difference we expect in comparing rating for unattested vs. novel constructions 
Bf(0.18, 0.29, uniform = 0, meanoftheory = 0, sdtheory = 0.65, tail = 1)
## $LikelihoodTheory
## [1] 1.012346
## 
## $Likelihoodnull
## [1] 0.6053312
## 
## $BayesFactor
## [1] 1.672383
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.18, 0.29, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# RRs for which BF > 3
#ev_for_h1 <- subset(data.frame(range_test), BF > 3)
#low_threshold <- min(ev_for_h1$sdtheory)
#high_threshold <- max(ev_for_h1$sdtheory)
#print(low_threshold)
#print(high_threshold)

Question 3: Does statistical entrenchment constrain verb argument construction generalizations in adults (judgment data)?

#first, filter our semantically incorrect trials

entrenchment_judgment_unattested_novel.df <- subset(entrenchment_judgment.df, semantically_correct == "1")   

#we only want to keep novel

entrenchment_judgment_novel.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items

entrenchment_judgment_unattested_constr1.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
entrenchment_judgment_unattested_constr2.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

entrenchment_judgment_unattested_novel.df <- rbind(entrenchment_judgment_novel.df, entrenchment_judgment_unattested_constr1.df, entrenchment_judgment_unattested_constr2.df)
entrenchment_judgment_unattested_novel.df$restricted_verb <- factor(entrenchment_judgment_unattested_novel.df$restricted_verb , levels = c("yes", "no"))

round(tapply(entrenchment_judgment_unattested_novel.df$response, entrenchment_judgment_unattested_novel.df$restricted_verb, mean),3)
##   yes    no 
## 4.425 3.825
#Center variables of interest using the lizCenter function:
d_unattested_novel_entrenchment = lizCenter(entrenchment_judgment_unattested_novel.df, list("restricted_verb"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_entrenchment_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct, data=d_unattested_novel_entrenchment, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_entrenchment_model, variable = c("b_Intercept", "b_restricted_verb.ct"))
##                        Estimate Est.Error       Q2.5      Q97.5
## b_Intercept           4.1393471 0.1454639  3.8557792  4.4239224
## b_restricted_verb.ct -0.5750232 0.1842304 -0.9341807 -0.2109194
mcmc_plot(judgments_entrenchment_model, variable = "b_Intercept", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_entrenchment_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.9988333
# drawing a max based on the difference between attested vs. unattested in this experiment (this was sig. evidence for entrenchment)
Bf(0.19, -0.58, uniform = 0, meanoftheory = 0, sdtheory = 0.38/2, tail = 1)
## $LikelihoodTheory
## [1] 0.004503071
## 
## $Likelihoodnull
## [1] 0.01989102
## 
## $BayesFactor
## [1] 0.2263872
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.19, -0.58, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF < 1/3
ev_for_h1 <- subset(data.frame(range_test), BF < 1/3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0
print(high_threshold)
## [1] 4

Question 4: Is the effect of statistical pre-emption larger than entrenchment (judgment data)?

#first, filter our semantically incorrect trials

all_judgment_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")   

#we only want to keep novel

all_judgment_novel.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "novel")   

#and restricted items

all_judgment_unattested_constr1.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")   
all_judgment_unattested_constr2.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")   

all_judgment_unattested_novel.df <- rbind(all_judgment_novel.df, all_judgment_unattested_constr1.df, all_judgment_unattested_constr2.df)
all_judgment_unattested_novel.df$restricted_verb <- factor(all_judgment_unattested_novel.df$restricted_verb , levels = c("yes", "no"))

round(tapply(all_judgment_unattested_novel.df$response, list(all_judgment_unattested_novel.df$restricted_verb, all_judgment_unattested_novel.df$condition), mean),3)
##     entrenchment preemption
## yes        4.425      2.423
## no         3.825      2.722
# preemption worked and opposite effect for entrenchment

#Center variables of interest using the lizCenter function:
df = lizCenter(all_judgment_unattested_novel.df, list("restricted_verb", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_pre_vs_ent_model <- brm(formula = response~(1 +restricted_verb.ct|participant_private_id)+restricted_verb.ct * condition.ct, data=df, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_pre_vs_ent_model, variable = c("b_Intercept", "b_restricted_verb.ct","b_condition.ct", "b_restricted_verb.ct:condition.ct"))
##                                      Estimate  Est.Error       Q2.5      Q97.5
## b_Intercept                        3.03198441 0.07724026  2.8814480  3.1837885
## b_restricted_verb.ct               0.03902691 0.13218226 -0.2209462  0.2998855
## b_condition.ct                    -1.52183619 0.14914237 -1.8129077 -1.2236399
## b_restricted_verb.ct:condition.ct  0.86090224 0.25396801  0.3577221  1.3588909
mcmc_plot(judgments_pre_vs_ent_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_pre_vs_ent_model))

C1=mean(samps[,"b_Intercept"] < 0) 
C2=mean(samps[,"b_restricted_verb.ct"] > 0)
C3=mean(samps[,"b_condition.ct"] > 0) 
C4=mean(samps[,"b_restricted_verb.ct:condition.ct"] > 0) 


pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.6115833
## 3         0.0000000
## 4         0.9992500
#roughly predicted effect size from previous study was 1.0. Use it as an estimate of the effect we expect here
Bf(0.25, 0.85, uniform = 0, meanoftheory = 0, sdtheory = 1.00, tail = 1)
## $LikelihoodTheory
## [1] 0.5506764
## 
## $Likelihoodnull
## [1] 0.004928877
## 
## $BayesFactor
## [1] 111.7245
H1RANGE = seq(0,4,by=0.01) # [5-1]-[0] - max effect of preemption minus no effect of entrenchment
range_test <- Bf_range(0.25, 0.85, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.09
print(high_threshold)
## [1] 4

Exploratory data analyses

Effect of statistical pre-emption: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

# Figure 17
judgments_unattested_attested.df <- subset(combined_judgment_data.df, semantically_correct == "1")   
judgments_unattested_attested.df <- subset(judgments_unattested_attested.df, restricted_verb == "yes")   


aggregated.means1 = aggregate(response ~ condition + attested_unattested + participant_private_id, judgments_unattested_attested.df , FUN=mean)
aggregated.means1<- rename(aggregated.means1, attested = attested_unattested)

aggregated.means1$attested<- recode(aggregated.means1$attested, "1" = "yes","0" = "no")


yarrr::pirateplot(formula = response ~  attested  + condition,
                  data = aggregated.means1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

# analyses
attested_vs_unattested = subset(preemption_judgment.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested$response, attested_vs_unattested$attested_unattested, mean),3)
##     0     1 
## 2.423 4.908
#Center variables of interest using the lizCenter function:
d_attested_unattested = lizCenter(attested_vs_unattested , list("attested_unattested"))


# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_preemption <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(attested_unattested_preemption, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                          Estimate  Est.Error     Q2.5    Q97.5
## b_Intercept              3.683332 0.06046416 3.564581 3.804082
## b_attested_unattested.ct 2.447635 0.11069088 2.226500 2.663440
mcmc_plot(attested_unattested_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0
# this prioris drawn from Experiment 1
Bf(0.11, 2.45, uniform = 0, meanoftheory = 0, sdtheory = 2.55, tail = 1)
## $LikelihoodTheory
## [1] 0.1972052
## 
## $Likelihoodnull
## [1] 6.891828e-108
## 
## $BayesFactor
## [1] 2.861436e+106
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.11, 2.45, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 4

Effect of statistical entrenchment: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

attested_vs_unattested_ent = subset(entrenchment_judgment.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested_ent$response, attested_vs_unattested_ent$attested_unattested, mean),3)
##     0     1 
## 4.425 4.950
#Center variables of interest using the lizCenter function:
d_attested_unattested_ent = lizCenter(attested_vs_unattested_ent, list("attested_unattested"))

attested_unattested_entrenchment <- brm(formula = response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested_ent, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                          Estimate  Est.Error      Q2.5     Q97.5
## b_Intercept              4.690573 0.06702145 4.5569003 4.8196045
## b_attested_unattested.ct 0.516202 0.13087031 0.2622447 0.7723229
mcmc_plot(attested_unattested_entrenchment, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##      c(C1, C2)
## 1 0.000000e+00
## 2 8.333333e-05
# expect a difference of 0.38 from previous work
Bf(0.13, 0.51, uniform = 0, meanoftheory = 0, sdtheory = 0.38, tail = 1)
## $LikelihoodTheory
## [1] 0.887002
## 
## $Likelihoodnull
## [1] 0.001396224
## 
## $BayesFactor
## [1] 635.2864
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.13, 0.51, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.04
print(high_threshold)
## [1] 4

Entrenchment vs. preemption: ratings for witnessed vs. unwitnessed forms

attested_vs_unattested_across = subset(combined_judgment_data.df, restricted_verb == "yes" & semantically_correct == "1")

round(tapply(attested_vs_unattested_across$response, list(attested_vs_unattested_across$condition, attested_vs_unattested_across$attested_unattested), mean),3)
##                  0     1
## entrenchment 4.425 4.950
## preemption   2.423 4.908
#Center variables of interest using the lizCenter function:
df_attested_unattested = lizCenter(attested_vs_unattested_across, list("attested_unattested", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment_preemption <- brm(formula = response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct * condition.ct, data=df_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment_preemption, variable = c("b_Intercept", "b_attested_unattested.ct",  "b_condition.ct", "b_attested_unattested.ct:condition.ct"))
##                                         Estimate  Est.Error      Q2.5
## b_Intercept                            3.9721848 0.04614902  3.882201
## b_attested_unattested.ct               1.8952621 0.08608420  1.723739
## b_condition.ct                        -0.9889057 0.09091672 -1.166782
## b_attested_unattested.ct:condition.ct  1.8871192 0.17065879  1.547228
##                                            Q97.5
## b_Intercept                            4.0631182
## b_attested_unattested.ct               2.0638373
## b_condition.ct                        -0.8068928
## b_attested_unattested.ct:condition.ct  2.2247271
mcmc_plot(attested_unattested_entrenchment_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 
C3=mean(samps[,"b_condition.ct"] > 0) 
C4=mean(samps[,"b_attested_unattested.ct:condition.ct"] < 0) 

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1                 0
## 2                 0
## 3                 0
## 4                 0
#roughly predicted effect size from previous study 2.11

Bf(0.17, 1.89, uniform = 0, meanoftheory = 0, sdtheory = 2.11, tail = 1)
## $LikelihoodTheory
## [1] 0.2530173
## 
## $Likelihoodnull
## [1] 3.393215e-27
## 
## $BayesFactor
## [1] 7.456566e+25
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.17, 1.89, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.02
print(high_threshold)
## [1] 4

Production data: Effect of statistical pre-emption

#Figure 18

data_long <- gather(preemption_production.df, det_type, produced, det1:none, factor_key=TRUE)

p = ggplot(data_long, aes(x = verb_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme_bw()+
  theme(panel.grid.major = element_blank()) +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("transitive causative", "periphrastic causative", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "transitive causative", "construction2" = "periphrastic causative", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("verb type at training")
p

#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel_preemption.df, list("restricted_verb"))


#a. Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted verbs against chance 

production_preemption_attested_unattested.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_attested_unattested.df <- subset(production_preemption_attested_unattested.df, restricted_verb =="yes")

round(tapply(production_preemption_attested_unattested.df$attested_unattested, production_preemption_attested_unattested.df$verb_type_training2, mean),3)
## construction1 construction2 
##         0.992         0.997
production_preemption_attested_unattested.df$verb_type_training2 <- factor(production_preemption_attested_unattested.df$verb_type_training2)

df_prod = lizCenter(production_preemption_attested_unattested.df , list("verb_type_training2"))  

# maximally vague priors for the predictors and the intercept
prod_attested_unattested = brm(formula = attested_unattested ~verb_type_training2.ct + (1 + verb_type_training2.ct|participant_private_id), data=df_prod, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested, variable = c("b_Intercept","b_verb_type_training2.ct"))
##                           Estimate Est.Error       Q2.5    Q97.5
## b_Intercept              4.7645589 0.3799911  4.0847166 5.562708
## b_verb_type_training2.ct 0.3277374 0.5918335 -0.7997574 1.506531
mcmc_plot(prod_attested_unattested, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_verb_type_training2.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1   0.00000
## 2   0.70975
#same analyses without verb_training_type

# maximally vague priors for the intercept
prod_attested_unattested_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 4.669262 0.3685544 4.020965 5.464516
mcmc_plot(prod_attested_unattested_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
# We will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the restricted verbs than for the novel verb

production_preemption_restricted_novel.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_restricted_novel.df<- subset(production_preemption_restricted_novel.df, verb_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$verb_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_preemption_restricted_novel.df$attested_unattested)

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$verb_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_preemption_restricted_novel.df$attested_unattested)

round(tapply(production_preemption_restricted_novel.df$attested_unattested , production_preemption_restricted_novel.df$verb_type_training2, mean),3)
## construction1 construction2         novel 
##         0.992         0.997         0.565
round(tapply(production_preemption_restricted_novel.df$attested_unattested , production_preemption_restricted_novel.df$restricted_verb, mean),3)
##    no   yes 
## 0.565 0.995
production_preemption_restricted_novel.df$restricted_verb <- factor(production_preemption_restricted_novel.df$restricted_verb)
production_preemption_restricted_novel1.df = lizCenter(production_preemption_restricted_novel.df, list("restricted_verb"))

# maximally vague priors for the predictors and the intercept
prod_unattested_novel_final = brm(formula = attested_unattested ~restricted_verb.ct + (1 + restricted_verb.ct|participant_private_id), data=production_preemption_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_unattested_novel_final, variable = c("b_Intercept","b_restricted_verb.ct"))
##                      Estimate Est.Error     Q2.5    Q97.5
## b_Intercept          3.442855 0.2733062 2.938702 4.003866
## b_restricted_verb.ct 3.495244 0.5413729 2.400233 4.539912
mcmc_plot(prod_unattested_novel_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0

Production data: Effect of statistical entrenchment

#Figure 19

# here, we want to see how often participants say det1 (e.g. transitive-only) for a det2 (intransitive-only) verb in the intransitive condition at test. 
# in other words, do they produce the unattested in a semantically correct trial?  

data_long_e <- gather(entrenchment_production.df, det_type, produced, det1:none, factor_key=TRUE)

data_long_e$transitivity_test_scene2 <-recode(data_long_e$transitivity_test_scene2, "construction1" = "test: transitive causative","construction2" = "test: intransitive inchoative")


p = ggplot(data_long_e, aes(x = verb_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme(panel.grid.major = element_blank()) +
  facet_grid("transitivity_test_scene2") +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("transitive causative", "intransitive inchoative", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "transitive causative", "construction2" = "intransitive inchoative", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("verb type at training")
p

#a. Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted verbs against chance 

production_entrenchment_attested_unattested.df  <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_attested_unattested.df  <- subset(production_entrenchment_attested_unattested.df, restricted_verb =="yes")

#How much of the time are participants producing attested items?
round(mean(production_entrenchment_attested_unattested.df$attested_unattested),3)
## [1] 0.572
# and separately for each verb type
round(tapply(production_entrenchment_attested_unattested.df$attested_unattested, production_entrenchment_attested_unattested.df$verb_type_training2, mean),3)
## construction1 construction2 
##         0.588         0.556
production_entrenchment_attested_unattested.df$verb_type_training2 <- factor(production_entrenchment_attested_unattested.df$verb_type_training2)
df_prod_ent = lizCenter((production_entrenchment_attested_unattested.df), list("verb_type_training2"))  


# maximally vague priors for the predictors and the intercept
prod_attested_unattested_ent = brm(formula = attested_unattested ~verb_type_training2.ct + (1 + verb_type_training2.ct|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent, variable = c("b_Intercept", "b_verb_type_training2.ct"))
##                            Estimate  Est.Error       Q2.5     Q97.5
## b_Intercept               0.2956674 0.09498848  0.1084311 0.4853495
## b_verb_type_training2.ct -0.1274236 0.16454403 -0.4534988 0.1879274
mcmc_plot(prod_attested_unattested_ent, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_verb_type_training2.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##     c(C1, C2)
## 1 0.001166667
## 2 0.225916667
# maximally vague priors for the intercept
prod_attested_unattested_ent_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent_final, variable = c("b_Intercept"))
##              Estimate Est.Error      Q2.5     Q97.5
## b_Intercept 0.2944747 0.0925517 0.1147689 0.4802477
mcmc_plot(prod_attested_unattested_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0.001166667
# c. we will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the 2 non-alternating verbs than for the novel verb (presumably the “unwitnessed” form has to be set arbitrarily here)


production_entrenchment_restricted_novel.df <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_restricted_novel.df<- subset(production_entrenchment_restricted_novel.df, verb_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_entrenchment_restricted_novel.df$attested_unattested)

production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_entrenchment_restricted_novel.df$attested_unattested)

round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$verb_type_training2, mean),3)
## construction1 construction2         novel 
##         0.588         0.556         0.511
# proportion of attested items for each verb type - we will now compare constr1/2 vs. novel


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$restricted_verb, mean),3)
##    no   yes 
## 0.511 0.572
#what this means is that participants produce *unattested forms* less for the restricted than they do for the novel

production_entrenchment_restricted_novel.df$restricted_verb <- factor(production_entrenchment_restricted_novel.df$restricted_verb)
production_entrenchment_restricted_novel1.df = lizCenter(production_entrenchment_restricted_novel.df, list("restricted_verb"))


# maximally vague priors for the predictors and the intercept
prod_unattested_novel_ent_final = brm(formula = attested_unattested ~restricted_verb.ct + (1 + restricted_verb.ct|participant_private_id), data=production_entrenchment_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(prod_unattested_novel_ent_final, variable = c("b_Intercept","b_restricted_verb.ct"))
##                       Estimate  Est.Error        Q2.5     Q97.5
## b_Intercept          0.2097302 0.06854349  0.07488709 0.3442831
## b_restricted_verb.ct 0.2422715 0.14762979 -0.04670998 0.5346281
mcmc_plot(prod_unattested_novel_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##    c(C1, C2)
## 1 0.00100000
## 2 0.04883333

Experiment 4

Load data

#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp4_production_data.csv")

combined_judgment_data.df <- read.csv("exp4_judgment_data.csv")
combined_judgment_data.df$restricted_noun <- factor(combined_judgment_data.df$restricted_noun)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)


#separately for entrenchment and preemption

#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$noun_type_test2 <- factor(entrenchment_production.df$noun_type_test2)

# Create columns that we will need to run production analyses in entrenchment
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced

entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)


entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$noun_type_test2 <- factor(entrenchment_judgment.df$noun_type_test2)
entrenchment_judgment.df$restricted_noun <- factor(entrenchment_judgment.df$restricted_noun)


#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$noun_type_test2 <- factor(preemption_production.df$noun_type_test2)

# Create columns that we will need to run production analyses in pre-emption
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced

preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)


preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$noun_type_test2 <- factor(preemption_judgment.df$noun_type_test2)
preemption_judgment.df$restricted_noun <- factor(preemption_judgment.df$restricted_noun)

Preregistered data analyses

Question 1: Have participants picked up on the difference in meaning between singular/plural marking?

Production data

#Figure 21
RQ1_graph_productions.df = subset(entrenchment_production.df, condition == "entrenchment" & noun_type_training2 == "alternating" |noun_type_training2 == "novel")
RQ1_graph_productions.df = subset(RQ1_graph_productions.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.graph1 = aggregate(semantically_correct ~ noun_type_training2 + participant_private_id, RQ1_graph_productions.df, FUN=mean)

aggregated.graph1 <- rename(aggregated.graph1, noun = noun_type_training2,
                            correct = semantically_correct)

yarrr::pirateplot(formula = correct  ~ noun,
                  data = aggregated.graph1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "% semantically correct",
                  cex.lab = 1,
                  cex.axis = 1,
                  cex.names = 1,
                  yaxt = "n")

axis(2, at = seq(0, 1, by = 0.25), las=1)
abline(h = 0.50, lty = 2)

#1 alternating noun production

alternating_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & noun_type_training2 == "alternating")
#and filter out responses where participants said something other than det1 or det2
alternating_prod.df = subset(alternating_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_alternating_prod.df = aggregate(semantically_correct ~ noun_type_test2 + participant_private_id, alternating_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_alternating_prod.df$semantically_correct),3)
## [1] 0.977
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_alternating_prod.df$semantically_correct, aggregated.means_alternating_prod.df$ noun_type_test2, mean),3)
## construction1 construction2 
##         0.994         0.959
a = lizCenter(alternating_prod.df, list("noun_type_test2"))  


# maximally vague priors for the intercept and the predictors
alternating_model <-brm(formula = semantically_correct~noun_type_test2.ct + (1 + noun_type_test2.ct|participant_private_id), data=a, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model, variable = c("b_Intercept", "b_noun_type_test2.ct"))
##                        Estimate Est.Error      Q2.5     Q97.5
## b_Intercept           4.0213097 0.4627455  3.187580 4.9875816
## b_noun_type_test2.ct -0.4352537 0.6843825 -1.753951 0.9605712
mcmc_plot(alternating_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_noun_type_test2.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.7461667
# no difference between construction 1 and construction 2  
# Final model

# maximally vague priors for the intercept 
alternating_model_final = brm(formula = semantically_correct~1 + (1|participant_private_id), data=a, family = bernoulli(link = logit),set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 3.705004 0.4254687 2.965602 4.603203
mcmc_plot(alternating_model_final, variable = "b_Intercept", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
#2 novel noun production

novel_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & noun_type_training2 == "novel")
#and filter out responses where participants said something other than det1 or det2
novel_prod.df = subset(novel_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_novel_prod.df = aggregate(semantically_correct ~ noun_type_test2 + participant_private_id, novel_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_novel_prod.df$semantically_correct),3)
## [1] 0.939
# average accuracy separately for singular and plural test scenes
round(tapply(aggregated.means_novel_prod.df$semantically_correct, aggregated.means_novel_prod.df$noun_type_test2, mean),3)
## construction1 construction2 
##         0.954         0.924
b = lizCenter(novel_prod.df, list("noun_type_test2"))  

# maximally vague priors for the intercept and the predictors
novel_model <- brm(formula = semantically_correct~noun_type_test2.ct + (1 + noun_type_test2.ct|participant_private_id), data=b, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model, variable = c("b_Intercept", "b_noun_type_test2.ct"))
##                        Estimate Est.Error      Q2.5     Q97.5
## b_Intercept           3.6411361 0.5132361  2.687886 4.7089462
## b_noun_type_test2.ct -0.3758622 0.5959546 -1.523891 0.8289487
mcmc_plot(novel_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_noun_type_test2.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.7421667
# no difference between singulars and plurals  
# Final model   

# maximally vague priors for the intercept 
novel_model_final <- brm(formula = semantically_correct~1+ (1|participant_private_id), data=b, family = bernoulli(link = logit), set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error    Q2.5    Q97.5
## b_Intercept 3.536751  0.490951 2.61099 4.531144
mcmc_plot(novel_model_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0

Judgment data

There are no semantically incorrect trials in Experiment 4, thus, these analyses are not possible.

Question 2: Does statistical preemption constrain morphological generalizations in adults (judgment data)?

#Figure 22

#no semantically incorrect trials here

#we only want to keep novel

judgments_novel.df <- subset(combined_judgment_data.df, noun_type_training2 == "novel")   

#and restricted items

judgment_unattested_constr1.df <- subset(preemption_judgment.df, noun_type_training2 == "construction1" & attested_unattested == "0")   
judgment_unattested_constr2.df <- subset(preemption_judgment.df, noun_type_training2 == "construction2" & attested_unattested == "0")   

judgment_unattested_novel.df <- rbind(judgments_novel.df, judgment_unattested_constr1.df, judgment_unattested_constr2.df)

aggregated.means = aggregate(response ~ condition + restricted_noun + participant_private_id, judgment_unattested_novel.df, FUN=mean)
aggregated.means<- rename(aggregated.means, restricted = restricted_noun)

yarrr::pirateplot(formula = response ~ restricted + condition,
                  data = aggregated.means,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

judgment_unattested_novel_preemption.df <- subset(judgment_unattested_novel.df, condition == "preemption")   
round(tapply(judgment_unattested_novel_preemption.df$response, judgment_unattested_novel_preemption.df$restricted_noun, mean),3)
##    no   yes 
## 3.475 2.717
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel_preemption.df, list("restricted_noun"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_preemption_model <- brm(formula = response~(1 +restricted_noun.ct|participant_private_id)+restricted_noun.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_preemption_model, variable = c("b_Intercept","b_restricted_noun.ct"))
##                        Estimate Est.Error     Q2.5      Q97.5
## b_Intercept           3.1032061 0.1213437  2.87143  3.3507982
## b_restricted_noun.ct -0.7216728 0.2187184 -1.15276 -0.2848259
mcmc_plot(judgments_preemption_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_preemption_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.9990833
# BF analyses: we use the difference between attested and novel in Experiment 1 (SD = 0.65) as an estimate of the difference we expect here

Bf(0.22, 0.72, uniform = 0, meanoftheory = 0, sdtheory = 0.65, tail = 1)
## $LikelihoodTheory
## [1] 0.6698739
## 
## $Likelihoodnull
## [1] 0.008564045
## 
## $BayesFactor
## [1] 78.21934
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.22, 0.72, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# RRs for which BF > 3
ev_for_h1 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0.08
print(high_threshold)
## [1] 4

Question 3: Does statistical entrenchment constrain morphological generalizations in adults (judgment data)?

#no semantically incorrect trials here

#we only want to keep novel

entrenchment_judgment_novel.df <- subset(entrenchment_judgment.df, noun_type_training2 == "novel")   

#and restricted items

entrenchment_judgment_unattested_constr1.df <- subset(entrenchment_judgment.df, noun_type_training2 == "construction1" & attested_unattested == "0")

entrenchment_judgment_unattested_constr2.df <- subset(entrenchment_judgment.df, noun_type_training2 == "construction2" & attested_unattested == "0")   

#bind new dataframe
entrenchment_judgment_unattested_novel.df <- rbind(entrenchment_judgment_novel.df, entrenchment_judgment_unattested_constr1.df, entrenchment_judgment_unattested_constr2.df)
entrenchment_judgment_unattested_novel.df$restricted_noun <- factor(entrenchment_judgment_unattested_novel.df$restricted_noun , levels = c("yes", "no"))

round(tapply(entrenchment_judgment_unattested_novel.df$response, entrenchment_judgment_unattested_novel.df$restricted_noun, mean),3)
##   yes    no 
## 4.537 4.646
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(entrenchment_judgment_unattested_novel.df, list("restricted_noun"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_entrenchment_model <- brm(formula = response~(1 +restricted_noun.ct|participant_private_id)+restricted_noun.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(judgments_entrenchment_model, variable = c("b_Intercept","b_restricted_noun.ct"))
##                       Estimate  Est.Error       Q2.5     Q97.5
## b_Intercept          4.5940398 0.09439947  4.4111242 4.7774082
## b_restricted_noun.ct 0.1028692 0.14163914 -0.1722345 0.3819393
mcmc_plot(judgments_entrenchment_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_entrenchment_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.2286667
# use unattested vs. novel in original study as an estimate of difference in unattested vs. novel
Bf(0.14, 0.11, uniform = 0, meanoftheory = 0, sdtheory = 0.38/2, tail = 1)
## $LikelihoodTheory
## [1] 2.237763
## 
## $Likelihoodnull
## [1] 2.092796
## 
## $BayesFactor
## [1] 1.06927
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.14, 0.11, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)


# find values for which BF is inconclusive 
ev_for_h1 <- subset(data.frame(range_test), BF < 3 & BF > 1/3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 0.88
# find out how many more participants we would need for conclusive evidence for entrenchment (BF > 3)
invisible(Bf_powercalc(0.14,  0.11, uniform=0, meanoftheory=0, sdtheory=0.38/2, tail=1, N=40, min=30, max=400))

#N = 238

Question 4: Is the effect of statistical pre-emption larger than entrenchment (judgment data)?

#all are semantically correct trials

#we only want to keep novel

all_judgment_novel.df <- subset(combined_judgment_data.df, noun_type_training2 == "novel")   

#and restricted items

all_judgment_unattested_constr1.df <- subset(combined_judgment_data.df, noun_type_training2 == "construction1" & attested_unattested == "0")   
all_judgment_unattested_constr2.df <- subset(combined_judgment_data.df, noun_type_training2 == "construction2" & attested_unattested == "0")   

all_judgment_unattested_novel.df <- rbind(all_judgment_novel.df, all_judgment_unattested_constr1.df, all_judgment_unattested_constr2.df)
all_judgment_unattested_novel.df$restricted_noun <- factor(all_judgment_unattested_novel.df$restricted_noun , levels = c("yes", "no"))

round(tapply(all_judgment_unattested_novel.df$response, list(all_judgment_unattested_novel.df$restricted_noun, all_judgment_unattested_novel.df$condition), mean),3)
##     entrenchment preemption
## yes        4.537      2.717
## no         4.646      3.475
round(tapply(all_judgment_unattested_novel.df$response, all_judgment_unattested_novel.df$condition, mean),3)
## entrenchment   preemption 
##        4.592        3.096
#Center variables of interest using the lizCenter function:
df = lizCenter(all_judgment_unattested_novel.df, list("restricted_noun", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_pre_vs_ent_model <- brm(formula = response~(1 +restricted_noun.ct|participant_private_id)+restricted_noun.ct * condition.ct, data=df, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_pre_vs_ent_model, variable = c("b_Intercept", "b_restricted_noun.ct","b_condition.ct", "b_restricted_noun.ct:condition.ct"))
##                                     Estimate  Est.Error        Q2.5      Q97.5
## b_Intercept                        3.8460940 0.07570185  3.69583699  3.9935034
## b_restricted_noun.ct               0.4236168 0.13033544  0.16765618  0.6801147
## b_condition.ct                    -1.4501211 0.14914868 -1.74252113 -1.1575872
## b_restricted_noun.ct:condition.ct  0.5813617 0.24874526  0.09089065  1.0654415
mcmc_plot(judgments_pre_vs_ent_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_pre_vs_ent_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] < 0)
C3=mean(samps[,"b_condition.ct"] > 0)
C4=mean(samps[,"b_restricted_noun.ct:condition.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1        0.00000000
## 2        0.00150000
## 3        0.00000000
## 4        0.01016667
#roughly predicted effect size from previous study was 1.0. Use it as an estimate of the effect we expect here
Bf(0.25, 0.57, uniform = 0, meanoftheory = 0, sdtheory = 1.00, tail = 1)
## $LikelihoodTheory
## [1] 0.6551184
## 
## $Likelihoodnull
## [1] 0.1186183
## 
## $BayesFactor
## [1] 5.52291
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.25, 0.57, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.15
print(high_threshold)
## [1] 2.12

Exploratory data analyses

Exploratory data analyses

Effect of statistical pre-emption: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

#Figure 23

judgments_unattested_attested.df <- subset(preemption_judgment.df, restricted_noun == "yes")   


aggregated.means1 = aggregate(response ~ condition + attested_unattested + participant_private_id, judgments_unattested_attested.df , FUN=mean)
aggregated.means1<- rename(aggregated.means1, attested = attested_unattested)

aggregated.means1$attested<- recode(aggregated.means1$attested, "1" = "yes","0" = "no")


yarrr::pirateplot(formula = response ~  attested  + condition,
                  data = aggregated.means1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

# analyses
attested_vs_unattested = subset(preemption_judgment.df, restricted_noun == "yes")

round(tapply(attested_vs_unattested$response, attested_vs_unattested$attested_unattested, mean),3)
##     0     1 
## 2.717 4.375
#Center variables of interest using the lizCenter function:
d_attested_unattested = lizCenter(attested_vs_unattested , list("attested_unattested"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_preemption <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_preemption, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                          Estimate  Est.Error      Q2.5    Q97.5
## b_Intercept              3.558943 0.07675638 3.4092887 3.712121
## b_attested_unattested.ct 1.464976 0.33103724 0.8023527 2.116254
mcmc_plot(attested_unattested_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##      c(C1, C2)
## 1 0.0000000000
## 2 0.0001666667
# prior from previous study with adults: 2.55
Bf(0.33, 1.49, uniform = 0, meanoftheory = 0, sdtheory = 2.55, tail = 1)
## $LikelihoodTheory
## [1] 0.2623458
## 
## $Likelihoodnull
## [1] 4.523803e-05
## 
## $BayesFactor
## [1] 5799.231
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.33, 1.49, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.09
print(high_threshold)
## [1] 4

Effect of statistical entrenchment: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

attested_vs_unattested_ent = subset(entrenchment_judgment.df, restricted_noun == "yes")

round(tapply(attested_vs_unattested_ent$response, attested_vs_unattested_ent$attested_unattested, mean),3)
##     0     1 
## 4.537 4.850
#Center variables of interest using the lizCenter function:
d_attested_unattested_ent = lizCenter(attested_vs_unattested_ent, list("attested_unattested"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested_ent, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                           Estimate  Est.Error       Q2.5     Q97.5
## b_Intercept              4.6950609 0.08545271 4.52512198 4.8608869
## b_attested_unattested.ct 0.3079102 0.14030036 0.03030863 0.5855303
mcmc_plot(attested_unattested_entrenchment, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1   0.00000
## 2   0.01525
# expect a difference of 0.38 from previous work
Bf(0.14, 0.31, uniform = 0, meanoftheory = 0, sdtheory = 0.38, tail = 1)
## $LikelihoodTheory
## [1] 1.442615
## 
## $Likelihoodnull
## [1] 0.2455251
## 
## $BayesFactor
## [1] 5.875631
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.14, 0.31, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.09
print(high_threshold)
## [1] 1.01

Entrenchment vs. preemption: ratings for witnessed vs. unwitnessed forms

attested_vs_unattested_across = subset(combined_judgment_data.df, restricted_noun == "yes")

round(tapply(attested_vs_unattested_across$response, list(attested_vs_unattested_across$condition, attested_vs_unattested_across$attested_unattested), mean),3)
##                  0     1
## entrenchment 4.537 4.850
## preemption   2.717 4.375
#Center variables of interest using the lizCenter function:
df_attested_unattested = lizCenter(attested_vs_unattested_across, list("attested_unattested", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment_preemption <- brm(formula = response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct * condition.ct, data=df_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment_preemption, variable = c("b_Intercept", "b_attested_unattested.ct", "b_condition.ct","b_attested_unattested.ct:condition.ct"))
##                                         Estimate  Est.Error       Q2.5
## b_Intercept                            4.1255081 0.05720107  4.0155303
## b_attested_unattested.ct               0.9507919 0.17586057  0.6145886
## b_condition.ct                        -1.1185757 0.11345058 -1.3425308
## b_attested_unattested.ct:condition.ct  1.1670502 0.34823427  0.4863870
##                                            Q97.5
## b_Intercept                            4.2394151
## b_attested_unattested.ct               1.3001521
## b_condition.ct                        -0.8915981
## b_attested_unattested.ct:condition.ct  1.8368703
mcmc_plot(attested_unattested_entrenchment_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 
C3=mean(samps[,"b_condition.ct"] > 0) 
C4=mean(samps[,"b_attested_unattested.ct:condition.ct"] < 0) 

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1             0e+00
## 2             0e+00
## 3             0e+00
## 4             5e-04
#roughly predicted effect size from previous study 2.11
Bf(0.34, 1.18, uniform = 0, meanoftheory = 0, sdtheory = 2.11, tail = 1)
## $LikelihoodTheory
## [1] 0.3204584
## 
## $Likelihoodnull
## [1] 0.002843783
## 
## $BayesFactor
## [1] 112.6874
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.34, 1.18, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.12
print(high_threshold)
## [1] 4

Production data: Effect of statistical pre-emption

#Figure 24
data_long <- gather(preemption_production.df, det_type, produced, det1:none, factor_key=TRUE)

p = ggplot(data_long, aes(x = noun_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme_bw()+
  theme(panel.grid.major = element_blank()) +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("plural 1", "plural 2", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "plural 1", "construction2" = "plural 2", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("noun type at training")
p

#Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted nouns against chance 

production_preemption_attested_unattested.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_attested_unattested.df <- subset(production_preemption_attested_unattested.df, restricted_noun =="yes")

round(tapply(production_preemption_attested_unattested.df $attested_unattested, production_preemption_attested_unattested.df $noun_type_training2, mean),3)
## construction1 construction2 
##         0.915         0.823
production_preemption_attested_unattested.df$noun_type_training2 <- factor(production_preemption_attested_unattested.df$noun_type_training2)

df_prod = lizCenter(production_preemption_attested_unattested.df , list("noun_type_training2"))  

# maximally vague priors for the predictors and the intercept
prod_attested_unattested = brm(formula = attested_unattested ~noun_type_training2.ct + (1 + noun_type_training2.ct|participant_private_id), data=df_prod, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested, variable = c("b_Intercept","b_noun_type_training2.ct"))
##                            Estimate Est.Error      Q2.5     Q97.5
## b_Intercept               3.0806615 0.6659365  1.695316 4.3026409
## b_noun_type_training2.ct -0.7817438 0.6058027 -1.905419 0.4777995
mcmc_plot(prod_attested_unattested, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_noun_type_training2.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.1021667
# maximally vague priors for the intercept
prod_attested_unattested_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept  2.95941 0.6271663 1.690026 4.142458
mcmc_plot(prod_attested_unattested_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0.0001666667
# We will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the restricted verbs than for the novel verb

production_preemption_restricted_novel.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_restricted_novel.df<- subset(production_preemption_restricted_novel.df, noun_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$noun_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_preemption_restricted_novel.df$attested_unattested)

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$noun_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_preemption_restricted_novel.df$attested_unattested)

round(tapply(production_preemption_restricted_novel.df$attested_unattested, production_preemption_restricted_novel.df$noun_type_training2, mean),3)
## construction1 construction2         novel 
##         0.915         0.823         0.497
round(tapply(production_preemption_restricted_novel.df$attested_unattested , production_preemption_restricted_novel.df$restricted_noun, mean),3)
##    no   yes 
## 0.497 0.869
production_preemption_restricted_novel.df$restricted_noun <- factor(production_preemption_restricted_novel.df$restricted_noun)
production_preemption_restricted_novel1.df = lizCenter(production_preemption_restricted_novel.df, list("restricted_noun"))

# maximally vague priors for the predictors and the intercept
prod_unattested_novel_final = brm(formula = attested_unattested ~restricted_noun.ct + (1 + restricted_noun.ct|participant_private_id), data=production_preemption_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(prod_unattested_novel_final, variable = c("b_Intercept","b_restricted_noun.ct"))
##                      Estimate Est.Error      Q2.5    Q97.5
## b_Intercept          2.063012 0.4340066 1.1975807 2.905823
## b_restricted_noun.ct 1.941087 0.7486716 0.4104456 3.367021
mcmc_plot(prod_unattested_novel_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##     c(C1, C2)
## 1 0.000000000
## 2 0.007333333

Production data: Effect of statistical entrenchment

#Figure 25

data_long_e <- gather(entrenchment_production.df, det_type, produced, det1:none, factor_key=TRUE)

data_long_e$noun_type_test2 <-recode(data_long_e$noun_type_test2, "construction1" = "test: singular","construction2" = "test: plural")


p = ggplot(data_long_e, aes(x = noun_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme(panel.grid.major = element_blank()) +
  facet_grid("noun_type_test2") +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("singular", "plural", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "singular", "construction2" = "plural", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("noun type at training")
p

#a. Are participants producing more attested than unattested dets?
# here, we want to see how often participants say the unattested e.g. transitive-only det1 for a det2 (intransitive-only) noun in the intransitive condition at test 
# and vice versa 

production_entrenchment_attested_unattested.df  <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_attested_unattested.df  <- subset(production_entrenchment_attested_unattested.df, restricted_noun =="yes")

#We want to compare attested vs. unattested trials for transitive nouns in the intransitive inchoative construction at test
production_entrenchment_attested_unattested1.df  <- subset(production_entrenchment_attested_unattested.df, noun_type_training2 == "construction1" & noun_type_test2 == "construction2")

#And intransitive inchoative nouns in the transitive construction at test. Filter out irrelevant trials
production_entrenchment_attested_unattested2.df  <- subset(production_entrenchment_attested_unattested.df, noun_type_training2 == "construction2" & noun_type_test2 == "construction1")


production_entrenchment_attested_unattested.df <- rbind(production_entrenchment_attested_unattested1.df, production_entrenchment_attested_unattested2.df)

#How much of the time are participants producing attested items?
round(mean(production_entrenchment_attested_unattested.df$attested_unattested),3)
## [1] 0.123
# and separately for each noun type
round(tapply(production_entrenchment_attested_unattested.df$attested_unattested, production_entrenchment_attested_unattested.df$noun_type_training2, mean),3)
## construction1 construction2 
##         0.137         0.109
production_entrenchment_attested_unattested.df$noun_type_training2 <- factor(production_entrenchment_attested_unattested.df$noun_type_training2)
df_prod_ent = lizCenter((production_entrenchment_attested_unattested.df), list("noun_type_training2"))  


# maximally vague priors for the predictors and the intercept
prod_attested_unattested_ent = brm(formula = attested_unattested ~noun_type_training2.ct + (1 + noun_type_training2.ct|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent, variable = c("b_Intercept","b_noun_type_training2.ct"))
##                            Estimate Est.Error      Q2.5     Q97.5
## b_Intercept              -3.1110588 0.8013145 -4.539751 -1.332528
## b_noun_type_training2.ct -0.3641648 0.7036910 -1.770832  1.026198
mcmc_plot(prod_attested_unattested_ent, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_noun_type_training2.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1   0.99850
## 2   0.29725
#same analyses without noun_training_type


# maximally vague priors for the intercept
prod_attested_unattested_ent_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent_final, variable = c("b_Intercept"))
##              Estimate Est.Error    Q2.5     Q97.5
## b_Intercept -3.170556 0.6442458 -4.4108 -1.854339
mcmc_plot(prod_attested_unattested_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 1
# c. we will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the 2 non-alternating nouns than for the novel noun (presumably the “unwitnessed” form has to be set arbitrarily here)


production_entrenchment_restricted_novel.df <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_restricted_novel.df<- subset(production_entrenchment_restricted_novel.df, noun_type_training2 != "alternating")

# all forms are unwitnessed for the novel noun so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$noun_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_entrenchment_restricted_novel.df$attested_unattested)
production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$noun_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_entrenchment_restricted_novel.df$attested_unattested)

# select trials featuring the novel noun in plural scences
production_entrenchment_restricted_novel1.df <- subset(production_entrenchment_restricted_novel.df, noun_type_training2 == "novel"  & noun_type_test2 == "construction2")


# Select trials featuring singular only nouns in plural scenes
production_entrenchment_restricted_novel2.df  <- subset(production_entrenchment_restricted_novel.df, noun_type_training2 == "construction1" & noun_type_test2 == "construction2")

# Select trials featuring plural only nouns in singular scenes
production_entrenchment_restricted_novel3.df  <- subset(production_entrenchment_restricted_novel.df, noun_type_training2 == "construction2" & noun_type_test2 == "construction1")


production_entrenchment_restricted_novel.df <- rbind(production_entrenchment_restricted_novel1.df, production_entrenchment_restricted_novel2.df, production_entrenchment_restricted_novel3.df)


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$noun_type_training2, mean),3)
## construction1 construction2         novel 
##         0.137         0.109         0.077
# reverse coding to focus on unattested rather than attested for novel vs. restricted
production_entrenchment_restricted_novel.df <- rbind(production_entrenchment_restricted_novel1.df, production_entrenchment_restricted_novel2.df, production_entrenchment_restricted_novel3.df)
production_entrenchment_restricted_novel.df$attested_unattested<- recode(production_entrenchment_restricted_novel.df$attested_unattested, `1` = 0L, `0` = 1L)


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$restricted_noun, mean),3)
##    no   yes 
## 0.923 0.877
#what this means is that participants produce *unattested forms* less for the restricted than they do for the novel

production_entrenchment_restricted_novel.df$restricted_noun <- factor(production_entrenchment_restricted_novel.df$restricted_noun)
production_entrenchment_restricted_novel1.df = lizCenter(production_entrenchment_restricted_novel.df, list("restricted_noun"))


# maximally vague priors for the predictors and the intercept
prod_unattested_novel_ent_final = brm(formula = attested_unattested ~restricted_noun.ct + (1 + restricted_noun.ct|participant_private_id), data=production_entrenchment_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(prod_unattested_novel_ent_final, variable = c("b_Intercept","b_restricted_noun.ct"))
##                        Estimate Est.Error      Q2.5     Q97.5
## b_Intercept           3.4133469 0.6633214  2.029119 4.6746109
## b_restricted_noun.ct -0.3175871 0.6621008 -1.641085 0.9995103
mcmc_plot(prod_unattested_novel_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##      c(C1, C2)
## 1 8.333333e-05
## 2 3.117500e-01

Experiment 5

#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp5_production_data.csv")

combined_judgment_data.df <- read.csv("exp5_judgment_data.csv")
combined_judgment_data.df$restricted_noun <- factor(combined_judgment_data.df$restricted_noun)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)


#separately for entrenchment and preemption

#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$noun_type_test2 <- factor(entrenchment_production.df$noun_type_test2)

# We want to first create some columns coding whether det1, det2, other and none was produced. we check proportions for each type of noun

entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)

entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$restricted_noun <- factor(entrenchment_judgment.df$restricted_noun)
entrenchment_judgment.df$noun_type_test2 <- factor(entrenchment_judgment.df$noun_type_test2)

#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct)
preemption_production.df$noun_type_test2 <- factor(preemption_production.df$noun_type_test2)

# We want to create some columns coding whether det1, det2, other and none was produced

preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)


preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$restricted_noun <- factor(preemption_judgment.df$restricted_noun)
preemption_judgment.df$noun_type_test2 <- factor(preemption_judgment.df$noun_type_test2)

Preregistered data analyses

Question 1: Have participants picked up on the difference in meaning between the two argument-structure constructions?

Production data

#Figure 26
RQ1_graph_productions.df = subset(entrenchment_production.df, condition == "entrenchment" & noun_type_training2 == "alternating" |noun_type_training2 == "novel")
RQ1_graph_productions.df = subset(RQ1_graph_productions.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.graph1 = aggregate(semantically_correct ~ noun_type_training2 + participant_private_id, RQ1_graph_productions.df, FUN=mean)

aggregated.graph1 <- rename(aggregated.graph1, noun = noun_type_training2,
                            correct = semantically_correct)

yarrr::pirateplot(formula = correct  ~ noun,
                  data = aggregated.graph1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "% semantically correct",
                  cex.lab = 1,
                  cex.axis = 1,
                  cex.names = 1,
                  yaxt = "n")

axis(2, at = seq(0, 1, by = 0.25), las=1)
abline(h = 0.50, lty = 2)

#1 alternating noun production

alternating_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & noun_type_training2 == "alternating")
#and filter out responses where participants said something other than det1 or det2
alternating_prod.df = subset(alternating_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_alternating_prod.df = aggregate(semantically_correct ~ noun_type_test2 + participant_private_id, alternating_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_alternating_prod.df$semantically_correct),3)
## [1] 0.958
# average accuracy separately for construction 1 (singular) and construction 2 (plural) scenes
round(tapply(aggregated.means_alternating_prod.df$semantically_correct, aggregated.means_alternating_prod.df$ noun_type_test2, mean),3)
## construction1 construction2 
##         0.976         0.940
a = lizCenter(alternating_prod.df, list("noun_type_test2"))  


# maximally vague priors for the intercept and the predictors
alternating_model <-brm(formula = semantically_correct~noun_type_test2.ct + (1 + noun_type_test2.ct|participant_private_id), data=a, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model, variable = c("b_Intercept", "b_noun_type_test2.ct"))
##                        Estimate Est.Error      Q2.5     Q97.5
## b_Intercept           3.0791575 0.4210314  2.323571 3.9693867
## b_noun_type_test2.ct -0.4464033 0.6105136 -1.681100 0.7551256
mcmc_plot(alternating_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_noun_type_test2.ct"] > 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.2274167
# no difference between construction 1 and construction 2  
# Final model

# maximally vague priors for the intercept 
alternating_model_final = brm(formula = semantically_correct~1 + (1|participant_private_id), data=a, family = bernoulli(link = logit),set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(alternating_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 2.973806 0.4062124 2.247031 3.840554
mcmc_plot(alternating_model_final, variable = "b_Intercept", regex = TRUE)

samps = as.matrix(as.mcmc(alternating_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
#2 novel noun production

novel_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & noun_type_training2 == "novel")
#and filter out responses where participants said something other than det1 or det2
novel_prod.df = subset(novel_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")

# aggregated dataframe for means
aggregated.means_novel_prod.df = aggregate(semantically_correct ~ noun_type_test2 + participant_private_id, novel_prod.df, FUN=mean)

# average accuracy across trial types
round(mean(aggregated.means_novel_prod.df$semantically_correct),3)
## [1] 0.94
# average accuracy separately for construction 1 (singular) and construction 2 (plural) scenes
round(tapply(aggregated.means_novel_prod.df$semantically_correct, aggregated.means_novel_prod.df$noun_type_test2, mean),3)
## construction1 construction2 
##          0.94          0.94
b = lizCenter(novel_prod.df, list("noun_type_test2"))  

# maximally vague priors for the intercept and the predictors
novel_model <- brm(formula = semantically_correct~noun_type_test2.ct + (1 + noun_type_test2.ct|participant_private_id), data=b, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model, variable = c("b_Intercept", "b_noun_type_test2.ct"))
##                         Estimate Est.Error      Q2.5    Q97.5
## b_Intercept           2.88762583 0.4459481  2.074774 3.814085
## b_noun_type_test2.ct -0.04618876 0.5988349 -1.262395 1.146949
mcmc_plot(novel_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_noun_type_test2.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1     0.000
## 2     0.529
# no difference between construction 1 and construction 2  
# Final model   

# maximally vague priors for the intercept 
novel_model_final <- brm(formula = semantically_correct~1+ (1|participant_private_id), data=b, family = bernoulli(link = logit), set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(novel_model_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 2.812491 0.4270611 2.039879 3.706569
mcmc_plot(novel_model_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(novel_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0

Judgment data

There are no semantically incorrect trials in Experiment 5, thus, these analyses are not possible.

Question 2: Does statistical preemption constrain morphological generalizations in children (judgment data)?

#Figure 27

#no semantically incorrect trials here

#we only want to keep novel

judgments_novel.df <- subset(preemption_judgment.df, noun_type_training2 == "novel")   

#and restricted items

judgment_unattested_constr1.df <- subset(preemption_judgment.df, noun_type_training2 == "construction1" & attested_unattested == "0")   
judgment_unattested_constr2.df <- subset(preemption_judgment.df, noun_type_training2 == "construction2" & attested_unattested == "0")   

judgment_unattested_novel.df <- rbind(judgments_novel.df, judgment_unattested_constr1.df, judgment_unattested_constr2.df)

aggregated.means = aggregate(response ~ condition + restricted_noun + participant_private_id, judgment_unattested_novel.df, FUN=mean)
aggregated.means<- rename(aggregated.means, restricted = restricted_noun)

yarrr::pirateplot(formula = response ~ restricted + condition,
                  data = aggregated.means,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

judgment_unattested_novel_preemption.df <- subset(preemption_judgment.df, condition == "preemption")   
round(tapply(judgment_unattested_novel_preemption.df$response, judgment_unattested_novel_preemption.df$restricted_noun, mean),3)
##    no   yes 
## 3.630 3.444
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel_preemption.df, list("restricted_noun"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_preemption_model <- brm(formula = response~(1 +restricted_noun.ct|participant_private_id)+restricted_noun.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_preemption_model, variable = c("b_Intercept","b_restricted_noun.ct"))
##                       Estimate  Est.Error       Q2.5      Q97.5
## b_Intercept           3.537893 0.08425124  3.3705169 3.70006217
## b_restricted_noun.ct -0.182427 0.11186044 -0.4002652 0.03702959
mcmc_plot(judgments_preemption_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_preemption_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.9488333
# BF analyses: we use the difference between attested and novel in Experiment 1 (SD = 0.65) as an estimate of the difference we expect here

Bf(0.21, 0.75, uniform = 0, meanoftheory = 0, sdtheory = 0.65/2, tail = 1)
## $LikelihoodTheory
## [1] 0.3147016
## 
## $Likelihoodnull
## [1] 0.003228164
## 
## $BayesFactor
## [1] 97.48626
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.21, 0.75, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h1 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0.07
print(high_threshold)
## [1] 4

Question 3: Does statistical entrenchment constrain morphological generalizations in children (judgment data)?

#no semantically incorrect trials here

#we only want to keep novel

entrenchment_judgment_novel.df <- subset(entrenchment_judgment.df, noun_type_training2 == "novel")   

#and restricted items

entrenchment_judgment_unattested_constr1.df <- subset(entrenchment_judgment.df, noun_type_training2 == "construction1" & attested_unattested == "0")

entrenchment_judgment_unattested_constr2.df <- subset(entrenchment_judgment.df, noun_type_training2 == "construction2" & attested_unattested == "0")   

#bind new dataframe
entrenchment_judgment_unattested_novel.df <- rbind(entrenchment_judgment_novel.df, entrenchment_judgment_unattested_constr1.df, entrenchment_judgment_unattested_constr2.df)
entrenchment_judgment_unattested_novel.df$restricted_noun <- factor(entrenchment_judgment_unattested_novel.df$restricted_noun , levels = c("yes", "no"))

round(tapply(entrenchment_judgment_unattested_novel.df$response, entrenchment_judgment_unattested_novel.df$restricted_noun, mean),3)
##   yes    no 
## 4.267 4.242
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(entrenchment_judgment_unattested_novel.df, list("restricted_noun"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_entrenchment_model <- brm(formula = response~(1 +restricted_noun.ct|participant_private_id)+restricted_noun.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))


posterior_summary(judgments_entrenchment_model, variable = c("b_Intercept","b_restricted_noun.ct"))
##                         Estimate Est.Error       Q2.5     Q97.5
## b_Intercept           4.25972249 0.1468246  3.9759917 4.5492463
## b_restricted_noun.ct -0.03145913 0.2135227 -0.4480477 0.3858558
mcmc_plot(judgments_entrenchment_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_entrenchment_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.0000000
## 2 0.5604167
# this one is based on the final N in the adult study (**attested vs. unattested** used as a max)
Bf(0.21, -0.02, uniform = 0, meanoftheory = 0, sdtheory = 0.38/2, tail = 1)
## $LikelihoodTheory
## [1] 1.337387
## 
## $Likelihoodnull
## [1] 1.891129
## 
## $BayesFactor
## [1] 0.7071894
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.21, -0.02, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)


# find values for which BF is inconclusive 
ev_for_h1 <- subset(data.frame(range_test), BF < 3 & BF > 1/3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 0.54
# find out how many more participants we would need for conclusive evidence for entrenchment (BF > 3)
invisible(Bf_powercalc(0.21, -0.02, uniform=0, meanoftheory=0, sdtheory=0.38/2, tail=1, N=40, min=30, max=400))

#N = 327

Question 4: Is the effect of statistical pre-emption larger than entrenchment (judgment data)?

#all are semantically correct trials

#we only want to keep novel

all_judgment_novel.df <- subset(combined_judgment_data.df, noun_type_training2 == "novel")   

#and restricted items

all_judgment_unattested_constr1.df <- subset(combined_judgment_data.df, noun_type_training2 == "construction1" & attested_unattested == "0")   
all_judgment_unattested_constr2.df <- subset(combined_judgment_data.df, noun_type_training2 == "construction2" & attested_unattested == "0")   

all_judgment_unattested_novel.df <- rbind(all_judgment_novel.df, all_judgment_unattested_constr1.df, all_judgment_unattested_constr2.df)
all_judgment_unattested_novel.df$restricted_noun <- factor(all_judgment_unattested_novel.df$restricted_noun , levels = c("yes", "no"))

round(tapply(all_judgment_unattested_novel.df$response, list(all_judgment_unattested_novel.df$restricted_noun, all_judgment_unattested_novel.df$condition), mean),3)
##     entrenchment preemption
## yes        4.267      2.363
## no         4.242      3.141
round(tapply(all_judgment_unattested_novel.df$response, all_judgment_unattested_novel.df$condition, mean),3)
## entrenchment   preemption 
##        4.254        2.752
#Center variables of interest using the lizCenter function:
df = lizCenter(all_judgment_unattested_novel.df, list("restricted_noun", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_pre_vs_ent_model <- brm(formula = response~(1 +restricted_noun.ct|participant_private_id)+restricted_noun.ct * condition.ct, data=df, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_pre_vs_ent_model, variable = c("b_Intercept", "b_restricted_noun.ct","b_condition.ct", "b_restricted_noun.ct:condition.ct"))
##                                     Estimate Est.Error        Q2.5     Q97.5
## b_Intercept                        3.4612980 0.0971128  3.27362309  3.650410
## b_restricted_noun.ct               0.3920230 0.1502058  0.09359305  0.682140
## b_condition.ct                    -1.4371588 0.1956216 -1.81939748 -1.049027
## b_restricted_noun.ct:condition.ct  0.7252682 0.2891767  0.14566399  1.278547
mcmc_plot(judgments_pre_vs_ent_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_pre_vs_ent_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] < 0)
C3=mean(samps[,"b_condition.ct"] > 0)
C4=mean(samps[,"b_restricted_noun.ct:condition.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1       0.000000000
## 2       0.005416667
## 3       0.000000000
## 4       0.007083333
#roughly predicted effect size from previous study was 1.0. Use it as an estimate of the max effect we expect here
Bf(0.29, 0.73, uniform = 0, meanoftheory = 0, sdtheory = 1.00/2, tail = 1)
## $LikelihoodTheory
## [1] 0.6125222
## 
## $Likelihoodnull
## [1] 0.05788388
## 
## $BayesFactor
## [1] 10.58191
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.29, 0.73, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.15
print(high_threshold)
## [1] 4

Exploratory data analyses

Exploratory data analyses

Effect of statistical pre-emption: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

# Figure 28
judgments_unattested_attested.df <- subset(judgments_unattested_attested.df, restricted_noun == "yes")   


aggregated.means1 = aggregate(response ~ condition + attested_unattested + participant_private_id, judgments_unattested_attested.df , FUN=mean)
aggregated.means1<- rename(aggregated.means1, attested = attested_unattested)

aggregated.means1$attested<- recode(aggregated.means1$attested, "1" = "yes","0" = "no")


yarrr::pirateplot(formula = response ~  attested  + condition,
                  data = aggregated.means1,
                  main = "",
                  theme=2,
                  point.o = .3,
                  gl.col = 'white',
                  ylab = "Rating",
                  cex.lab = 0.8,
                  cex.axis = 1,
                  cex.names = 0.8,
                  yaxt = "n")

axis(2, at = seq(1, 9, by = 1), las=1)

# analyses
attested_vs_unattested = subset(preemption_judgment.df, restricted_noun == "yes")

round(tapply(attested_vs_unattested$response, attested_vs_unattested$attested_unattested, mean),3)
##     0     1 
## 2.363 4.526
#Center variables of interest using the lizCenter function:
d_attested_unattested = lizCenter(attested_vs_unattested , list("attested_unattested"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_preemption <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_preemption, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                          Estimate  Est.Error     Q2.5    Q97.5
## b_Intercept              3.465906 0.08489338 3.302325 3.635958
## b_attested_unattested.ct 2.034005 0.24222250 1.552625 2.500652
mcmc_plot(attested_unattested_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1         0
## 2         0
# prior from previous study with adults: 2.55 as a max
Bf(0.24, 2.04, uniform = 0, meanoftheory = 0, sdtheory = 2.55/2 , tail = 1)
## $LikelihoodTheory
## [1] 0.1786466
## 
## $Likelihoodnull
## [1] 3.402598e-16
## 
## $BayesFactor
## [1] 5.250301e+14
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.24, 2.04, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.04
print(high_threshold)
## [1] 4

Effect of statistical entrenchment: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

attested_vs_unattested_ent = subset(entrenchment_judgment.df, restricted_noun == "yes")

round(tapply(attested_vs_unattested_ent$response, attested_vs_unattested_ent$attested_unattested, mean),3)
##     0     1 
## 4.267 4.717
#Center variables of interest using the lizCenter function:
d_attested_unattested_ent = lizCenter(attested_vs_unattested_ent, list("attested_unattested"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct, data=d_attested_unattested_ent, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment, variable = c("b_Intercept", "b_attested_unattested.ct"))
##                           Estimate Est.Error       Q2.5     Q97.5
## b_Intercept              4.5031623 0.1255597 4.25506558 4.7501735
## b_attested_unattested.ct 0.4325098 0.1688194 0.09821768 0.7688181
mcmc_plot(attested_unattested_entrenchment, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 


pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1    0.0000
## 2    0.0055
# expect a difference of 0.38 from previous work
Bf(0.17, 0.43, uniform = 0, meanoftheory = 0, sdtheory = 0.346/2, tail = 1)
## $LikelihoodTheory
## [1] 0.6592187
## 
## $Likelihoodnull
## [1] 0.0957568
## 
## $BayesFactor
## [1] 6.884301
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.17, 0.43, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.09
print(high_threshold)
## [1] 2.72

Entrenchment vs. preemption: ratings for witnessed vs. unwitnessed forms

attested_vs_unattested_across = subset(combined_judgment_data.df, restricted_noun == "yes")

round(tapply(attested_vs_unattested_across$response, list(attested_vs_unattested_across$condition, attested_vs_unattested_across$attested_unattested), mean),3)
##                  0     1
## entrenchment 4.267 4.717
## preemption   2.363 4.526
#Center variables of interest using the lizCenter function:
df_attested_unattested = lizCenter(attested_vs_unattested_across, list("attested_unattested", "condition"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment_preemption <- brm(formula = response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct * condition.ct, data=df_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment_preemption, variable = c("b_Intercept", "b_attested_unattested.ct","b_condition.ct", "b_attested_unattested.ct:condition.ct"))
##                                         Estimate  Est.Error       Q2.5
## b_Intercept                            3.9475537 0.07354916  3.8008239
## b_attested_unattested.ct               1.3234928 0.14895220  1.0344859
## b_condition.ct                        -0.9939045 0.14423157 -1.2714740
## b_attested_unattested.ct:condition.ct  1.5519808 0.28477743  0.9788585
##                                            Q97.5
## b_Intercept                            4.0899080
## b_attested_unattested.ct               1.6128755
## b_condition.ct                        -0.7071331
## b_attested_unattested.ct:condition.ct  2.1026306
mcmc_plot(attested_unattested_entrenchment_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 
C3=mean(samps[,"b_condition.ct"] > 0) 
C4=mean(samps[,"b_attested_unattested.ct:condition.ct"] < 0) 

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1                 0
## 2                 0
## 3                 0
## 4                 0
#max predicted effect size from previous study 2.11
Bf(0.29, 1.56, uniform = 0, meanoftheory = 0, sdtheory = 2.12/2, tail = 1)
## $LikelihoodTheory
## [1] 0.2650902
## 
## $Likelihoodnull
## [1] 7.160226e-07
## 
## $BayesFactor
## [1] 370225.9
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.29, 1.56, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.06
print(high_threshold)
## [1] 4

Production data: Effect of statistical pre-emption

# Figure 29

# filter out missing data
data_long <- subset(preemption_production.df, experimenter == "GS")


data_long <- gather(data_long, det_type, produced, det1:none, factor_key=TRUE)

data_long <- gather(preemption_production.df, det_type, produced, det1:none, factor_key=TRUE)

p = ggplot(data_long, aes(x = noun_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme_bw()+
  theme(panel.grid.major = element_blank()) +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("plural 1", "plural 2", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "plural 1", "construction2" = "plural 2", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("noun type at training")
p

#Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted nouns against chance 

production_preemption_attested_unattested.df <- subset(preemption_production.df, experimenter == "GS")
production_preemption_attested_unattested.df <- subset(production_preemption_attested_unattested.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_attested_unattested.df <- subset(production_preemption_attested_unattested.df, restricted_noun =="yes")

round(tapply(production_preemption_attested_unattested.df $attested_unattested, production_preemption_attested_unattested.df $noun_type_training2, mean),3)
## construction1 construction2 
##         0.940         0.944
production_preemption_attested_unattested.df$noun_type_training2 <- factor(production_preemption_attested_unattested.df$noun_type_training2)

df_prod = lizCenter(production_preemption_attested_unattested.df , list("noun_type_training2"))  

# maximally vague priors for the predictors and the intercept
prod_attested_unattested = brm(formula = attested_unattested ~noun_type_training2.ct + (1 + noun_type_training2.ct|participant_private_id), data=df_prod, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested, variable = c("b_Intercept","b_noun_type_training2.ct"))
##                           Estimate Est.Error       Q2.5    Q97.5
## b_Intercept              3.1598693 0.9611507  0.8657655 4.740990
## b_noun_type_training2.ct 0.1546601 0.5936991 -1.0041021 1.324692
mcmc_plot(prod_attested_unattested, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_noun_type_training2.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##     c(C1, C2)
## 1 0.003916667
## 2 0.399250000
#same analyses without noun_training_type

# maximally vague priors for the intercept
prod_attested_unattested_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_final, variable = c("b_Intercept"))
##             Estimate Est.Error     Q2.5    Q97.5
## b_Intercept 3.262778 0.8946009 1.124057 4.695502
mcmc_plot(prod_attested_unattested_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0.004083333
# We will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the restricted verbs than for the novel verb


production_preemption_restricted_novel.df <- subset(preemption_production.df, experimenter == "GS")
production_preemption_restricted_novel.df <- subset(production_preemption_restricted_novel.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_restricted_novel.df<- subset(production_preemption_restricted_novel.df, noun_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$noun_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_preemption_restricted_novel.df$attested_unattested)

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$noun_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_preemption_restricted_novel.df$attested_unattested)

round(tapply(production_preemption_restricted_novel.df$attested_unattested, production_preemption_restricted_novel.df$noun_type_training2, mean),3)
## construction1 construction2         novel 
##         0.940         0.944         0.463
round(tapply(production_preemption_restricted_novel.df$attested_unattested , production_preemption_restricted_novel.df$restricted_noun, mean),3)
##    no   yes 
## 0.463 0.942
production_preemption_restricted_novel.df$restricted_noun <- factor(production_preemption_restricted_novel.df$restricted_noun)
production_preemption_restricted_novel1.df = lizCenter(production_preemption_restricted_novel.df, list("restricted_noun"))

# maximally vague priors for the predictors and the intercept
prod_unattested_novel_final = brm(formula = attested_unattested ~restricted_noun.ct + (1 + restricted_noun.ct|participant_private_id), data=production_preemption_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_unattested_novel_final, variable = c("b_Intercept","b_restricted_noun.ct"))
##                      Estimate Est.Error        Q2.5    Q97.5
## b_Intercept          2.394837 0.5213305  1.23559222 3.323894
## b_restricted_noun.ct 1.874355 0.9304485 -0.05485393 3.582040
mcmc_plot(prod_unattested_novel_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##      c(C1, C2)
## 1 0.0008333333
## 2 0.0278333333

Production data: Effect of statistical entrenchment

# Figure 30

# filter out missing data
data_long_e <- subset(entrenchment_production.df, experimenter == "GS")

data_long_e <- gather(data_long_e, det_type, produced, det1:none, factor_key=TRUE)

data_long_e$noun_type_test2 <-recode(data_long_e$noun_type_test2, "construction1" = "test: singular","construction2" = "test: plural")

p = ggplot(data_long_e, aes(x = noun_type_training2, y = produced, fill = det_type)) +
  geom_bar(stat = "identity", position = "fill") +
  theme(panel.grid.major = element_blank()) +
  facet_grid("noun_type_test2") +
  theme(panel.grid.minor = element_blank()) +
  scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("singular", "plural", "other", "none")) +
  scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "singular", "construction2" = "plural", "novel" = "novel")) +
  ylab("proportion produced") +
  xlab("noun type at training")
p

#a. Are participants producing more attested than unattested dets?
# here, we want to see how often participants say the unattested e.g. transitive-only det1 for a det2 (intransitive-only) noun in the intransitive condition at test 
# and vice versa 
production_entrenchment_attested_unattested.df <- subset(entrenchment_production.df, experimenter == "GS")
production_entrenchment_attested_unattested.df  <- subset(production_entrenchment_attested_unattested.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_attested_unattested.df  <- subset(production_entrenchment_attested_unattested.df, restricted_noun =="yes")

#We want to compare attested vs. unattested trials for transitive nouns in the intransitive inchoative construction at test
production_entrenchment_attested_unattested1.df  <- subset(production_entrenchment_attested_unattested.df, noun_type_training2 == "construction1" & noun_type_test2 == "construction2")

#And intransitive inchoative nouns in the transitive construction at test. Filter out irrelevant trials
production_entrenchment_attested_unattested2.df  <- subset(production_entrenchment_attested_unattested.df, noun_type_training2 == "construction2" & noun_type_test2 == "construction1")

production_entrenchment_attested_unattested.df <- rbind(production_entrenchment_attested_unattested1.df, production_entrenchment_attested_unattested2.df)

#How much of the time are participants producing attested items?
round(mean(production_entrenchment_attested_unattested.df$attested_unattested),3)
## [1] 0.167
# and separately for each noun type
round(tapply(production_entrenchment_attested_unattested.df$attested_unattested, production_entrenchment_attested_unattested.df$noun_type_training2, mean),3)
## construction1 construction2 
##         0.167         0.167
production_entrenchment_attested_unattested.df$noun_type_training2 <- factor(production_entrenchment_attested_unattested.df$noun_type_training2)
df_prod_ent = lizCenter((production_entrenchment_attested_unattested.df), list("noun_type_training2"))  


# maximally vague priors for the predictors and the intercept
prod_attested_unattested_ent = brm(formula = attested_unattested ~noun_type_training2.ct + (1 + noun_type_training2.ct|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent, variable = c("b_Intercept","b_noun_type_training2.ct"))
##                             Estimate Est.Error       Q2.5     Q97.5
## b_Intercept              -1.89807063 0.4223627 -2.7690007 -1.093261
## b_noun_type_training2.ct  0.06586695 0.4794867 -0.8541068  1.033883
mcmc_plot(prod_attested_unattested_ent, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_noun_type_training2.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1 0.9999167
## 2 0.4501667
#same analyses without noun_training_type


# maximally vague priors for the intercept
prod_attested_unattested_ent_final = brm(formula = attested_unattested ~1 + (1|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent_final, variable = c("b_Intercept"))
##              Estimate Est.Error   Q2.5     Q97.5
## b_Intercept -1.871587 0.4215229 -2.749 -1.074514
mcmc_plot(prod_attested_unattested_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0.9999167
# c. we will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the 2 non-alternating nouns than for the novel noun (presumably the “unwitnessed” form has to be set arbitrarily here)

production_entrenchment_restricted_novel.df <- subset(entrenchment_production.df, experimenter == "GS")
production_entrenchment_restricted_novel.df <- subset(production_entrenchment_restricted_novel.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_restricted_novel.df<- subset(production_entrenchment_restricted_novel.df, noun_type_training2 != "alternating")

# all forms are unwitnessed for the novel noun so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$noun_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_entrenchment_restricted_novel.df$attested_unattested)
production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$noun_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_entrenchment_restricted_novel.df$attested_unattested)

# select trials featuring the novel noun in the intransitive inchoative construction
production_entrenchment_restricted_novel1.df <- subset(production_entrenchment_restricted_novel.df, noun_type_training2 == "novel"  & noun_type_test2 == "construction2")


# Select trials featuring transitive nouns in the intransitive inchoative construction at test
production_entrenchment_restricted_novel2.df  <- subset(production_entrenchment_restricted_novel.df, noun_type_training2 == "construction1" & noun_type_test2 == "construction2")

# Select trials featuring intransitive nouns in the transitive construction at test
production_entrenchment_restricted_novel3.df  <- subset(production_entrenchment_restricted_novel.df, noun_type_training2 == "construction2" & noun_type_test2 == "construction1")


production_entrenchment_restricted_novel.df <- rbind(production_entrenchment_restricted_novel1.df, production_entrenchment_restricted_novel2.df, production_entrenchment_restricted_novel3.df)


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$noun_type_training2, mean),3)
## construction1 construction2         novel 
##         0.167         0.167         0.060
# reverse coding to focus on unattested rather than attested for novel vs. restricted
production_entrenchment_restricted_novel.df <- rbind(production_entrenchment_restricted_novel1.df, production_entrenchment_restricted_novel2.df, production_entrenchment_restricted_novel3.df)
production_entrenchment_restricted_novel.df$attested_unattested<- recode(production_entrenchment_restricted_novel.df$attested_unattested, `1` = 0L, `0` = 1L)


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$restricted_noun, mean),3)
##    no   yes 
## 0.940 0.833
#what this means is that participants produce *unattested forms* less for the restricted than they do for the novel

production_entrenchment_restricted_novel.df$restricted_noun <- factor(production_entrenchment_restricted_novel.df$restricted_noun)
production_entrenchment_restricted_novel1.df = lizCenter(production_entrenchment_restricted_novel.df, list("restricted_noun"))


# maximally vague priors for the predictors and the intercept
prod_unattested_novel_ent_final = brm(formula = attested_unattested ~restricted_noun.ct + (1 + restricted_noun.ct|participant_private_id), data=production_entrenchment_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_unattested_novel_ent_final, variable = c("b_Intercept","b_restricted_noun.ct"))
##                        Estimate Est.Error      Q2.5       Q97.5
## b_Intercept           2.2503753 0.4107460  1.467360  3.09134109
## b_restricted_noun.ct -0.9998556 0.5189201 -2.024155 -0.01250217
mcmc_plot(prod_unattested_novel_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_noun.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1   0.00000
## 2   0.02375

Analyses across experiments

#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("all_production_data.csv")

combined_judgment_data.df <- read.csv("all_judgment_data.csv")
combined_judgment_data.df$restricted_verb_noun <- factor(combined_judgment_data.df$restricted_verb_noun)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)
combined_judgment_data.df$experiment  <- factor(combined_judgment_data.df$experiment)

#separately for entrenchment and preemption

#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$scene_test2 <- factor(entrenchment_production.df$scene_test2)
entrenchment_production.df$experiment  <- factor(entrenchment_production.df$experiment)
entrenchment_production.df$verb_noun_type_training2 <- factor(entrenchment_production.df$verb_noun_type_training2)
entrenchment_production.df$restricted_verb_noun <- factor(entrenchment_production.df$restricted_verb_noun)

entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$restricted_verb_noun <- factor(entrenchment_judgment.df$restricted_verb_noun)
entrenchment_judgment.df$scene_test2 <- factor(entrenchment_judgment.df$scene_test2)


#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct)
preemption_production.df$scene_test2 <- factor(preemption_production.df$scene_test2)
preemption_production.df$experiment  <- factor(preemption_production.df$experiment)
preemption_production.df$verb_noun_type_training2 <- factor(preemption_production.df$verb_noun_type_training2)
preemption_production.df$restricted_verb_noun <- factor(preemption_production.df$restricted_verb_noun)

preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$restricted_verb_noun <- factor(preemption_judgment.df$restricted_verb_noun)
preemption_judgment.df$scene_test2 <- factor(preemption_judgment.df$scene_test2)

Preregistered data analyses

Question 2: Does statistical preemption constrain morphological generalizations (judgment data)?

#first, filter our semantically incorrect trials

judgments_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")   

#we only want to keep novel

judgments_novel.df <- subset(judgments_unattested_novel.df, verb_noun_type_training2 == "novel")   

#and restricted items

judgment_unattested_constr1.df <- subset(judgments_unattested_novel.df, verb_noun_type_training2 == "construction1" & attested_unattested == "0")   
judgment_unattested_constr2.df <- subset(judgments_unattested_novel.df, verb_noun_type_training2 == "construction2" & attested_unattested == "0")   

judgment_unattested_novel.df <- rbind(judgments_novel.df, judgment_unattested_constr1.df, judgment_unattested_constr2.df)
judgment_unattested_novel.df$restricted_verb_noun <- factor(judgment_unattested_novel.df$restricted_verb_noun, levels = c("yes", "no"))

judgment_unattested_novel_preemption.df <- subset(judgment_unattested_novel.df, condition == "preemption")   
round(tapply(judgment_unattested_novel_preemption.df$response, list(judgment_unattested_novel_preemption.df$restricted_verb_noun, judgment_unattested_novel_preemption.df$experiment), mean),3)
##      exp1  exp2  exp3  exp4  exp5
## yes 2.362 2.259 2.423 2.717 2.363
## no  3.026 2.644 2.722 3.475 3.141
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel_preemption.df, list("restricted_verb_noun","experiment"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_preemption_model <- brm(formula = response~(1 +restricted_verb_noun.ct|participant_private_id)+restricted_verb_noun.ct*experiment.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_preemption_model, variable = c("b_Intercept","b_restricted_verb_noun.ct", "b_experiment.ct", "b_restricted_verb_noun.ct:experiment.ct"))
##                                           Estimate  Est.Error         Q2.5
## b_Intercept                             2.69305582 0.04806365  2.600696060
## b_restricted_verb_noun.ct               0.55633205 0.08551984  0.389007014
## b_experiment.ct                         0.07434554 0.03457884  0.006191748
## b_restricted_verb_noun.ct:experiment.ct 0.06161319 0.06116713 -0.057903936
##                                             Q97.5
## b_Intercept                             2.7879767
## b_restricted_verb_noun.ct               0.7222466
## b_experiment.ct                         0.1424988
## b_restricted_verb_noun.ct:experiment.ct 0.1825595
mcmc_plot(judgments_preemption_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_preemption_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb_noun.ct"] < 0)
C3=mean(samps[,"b_experiment.ct"] < 0)
C4=mean(samps[,"b_restricted_verb_noun.ct:experiment.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1        0.00000000
## 2        0.00000000
## 3        0.01516667
## 4        0.15583333
# BF analyses: we use the difference between attested and novel in Experiment 1 (SD = 0.65) as an estimate of the difference we expect here

Bf(0.08, 0.55, uniform = 0, meanoftheory = 0, sdtheory = 0.65/2, tail = 1)
## $LikelihoodTheory
## [1] 0.6179055
## 
## $Likelihoodnull
## [1] 2.717816e-10
## 
## $BayesFactor
## [1] 2273536961
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.08, 0.55, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h1 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0.02
print(high_threshold)
## [1] 4

Question 3: Does statistical entrenchment constrain morphological generalizations in children (judgment data)?

#no semantically incorrect trials here

#we only want to keep novel

entrenchment_judgment_novel.df <- subset(entrenchment_judgment.df, verb_noun_type_training2 == "novel")   

#and restricted items

entrenchment_judgment_unattested_constr1.df <- subset(entrenchment_judgment.df, verb_noun_type_training2 == "construction1" & attested_unattested == "0")

entrenchment_judgment_unattested_constr2.df <- subset(entrenchment_judgment.df, verb_noun_type_training2 == "construction2" & attested_unattested == "0")   

#bind new dataframe
entrenchment_judgment_unattested_novel.df <- rbind(entrenchment_judgment_novel.df, entrenchment_judgment_unattested_constr1.df, entrenchment_judgment_unattested_constr2.df)
entrenchment_judgment_unattested_novel.df$restricted_verb_noun <- factor(entrenchment_judgment_unattested_novel.df$restricted_verb_noun , levels = c("yes", "no"))

round(tapply(entrenchment_judgment_unattested_novel.df$response, list(entrenchment_judgment_unattested_novel.df$restricted_verb_noun, entrenchment_judgment_unattested_novel.df$experiment), mean),3)
##      exp1  exp2  exp3  exp4  exp5
## yes 3.532 3.331 3.397 4.537 4.267
## no  3.212 3.053 2.997 4.646 4.242
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(entrenchment_judgment_unattested_novel.df, list("restricted_verb_noun","experiment"))

# maximally vague priors for the predictors (we don't interpret the intercept here) 
judgments_entrenchment_model <- brm(formula = response~(1 +restricted_verb_noun.ct|participant_private_id)+restricted_verb_noun.ct*experiment.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_entrenchment_model, variable = c("b_Intercept","b_restricted_verb_noun.ct", "b_experiment.ct", "b_restricted_verb_noun.ct:experiment.ct"))
##                                            Estimate  Est.Error         Q2.5
## b_Intercept                              3.65902955 0.05059905  3.560145793
## b_restricted_verb_noun.ct               -0.20199043 0.06564068 -0.332224072
## b_experiment.ct                          0.31000379 0.03562617  0.239751632
## b_restricted_verb_noun.ct:experiment.ct  0.09551564 0.04550845  0.005478458
##                                               Q97.5
## b_Intercept                              3.75838841
## b_restricted_verb_noun.ct               -0.07440259
## b_experiment.ct                          0.38046527
## b_restricted_verb_noun.ct:experiment.ct  0.18419928
mcmc_plot(judgments_entrenchment_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_entrenchment_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb_noun.ct"] < 0)
C3=mean(samps[,"b_experiment.ct"] < 0)
C4=mean(samps[,"b_restricted_verb_noun.ct:experiment.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1        0.00000000
## 2        0.99900000
## 3        0.00000000
## 4        0.01883333
# this one is based on the final N in the adult study (**attested vs. unattested** used as a max)
Bf(0.07, -0.20, uniform = 0, meanoftheory = 0, sdtheory = 0.38/2, tail = 1)
## $LikelihoodTheory
## [1] 0.009072943
## 
## $Likelihoodnull
## [1] 0.09620142
## 
## $BayesFactor
## [1] 0.09431195
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.07,-0.20, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)


# find values for which BF is less than 1/3
ev_for_h1 <- subset(data.frame(range_test), BF < 3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0
print(high_threshold)
## [1] 4

Question 4: Is the effect of statistical pre-emption larger than entrenchment (judgment data)?

#all are semantically correct trials

#we only want to keep novel

all_judgment_novel.df <- subset(combined_judgment_data.df, verb_noun_type_training2 == "novel")   

#and restricted items

all_judgment_unattested_constr1.df <- subset(combined_judgment_data.df, verb_noun_type_training2 == "construction1" & attested_unattested == "0")   
all_judgment_unattested_constr2.df <- subset(combined_judgment_data.df, verb_noun_type_training2 == "construction2" & attested_unattested == "0")   

all_judgment_unattested_novel.df <- rbind(all_judgment_novel.df, all_judgment_unattested_constr1.df, all_judgment_unattested_constr2.df)
all_judgment_unattested_novel.df$restricted_verb_noun <- factor(all_judgment_unattested_novel.df$restricted_verb_noun , levels = c("yes", "no"))

round(tapply(all_judgment_unattested_novel.df$response, list(all_judgment_unattested_novel.df$restricted_verb_noun, all_judgment_unattested_novel.df$condition, all_judgment_unattested_novel.df$experiment), mean),3)
## , , exp1
## 
##     entrenchment preemption
## yes        3.532      2.362
## no         3.212      3.026
## 
## , , exp2
## 
##     entrenchment preemption
## yes        3.331      2.259
## no         3.053      2.644
## 
## , , exp3
## 
##     entrenchment preemption
## yes        3.397      2.423
## no         2.997      2.722
## 
## , , exp4
## 
##     entrenchment preemption
## yes        4.537      2.717
## no         4.646      3.475
## 
## , , exp5
## 
##     entrenchment preemption
## yes        4.267      2.363
## no         4.242      3.141
#Center variables of interest using the lizCenter function:
df = lizCenter(all_judgment_unattested_novel.df, list("restricted_verb_noun", "condition","experiment"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_pre_vs_ent_model <- brm(formula = response~(1 +restricted_verb_noun.ct|participant_private_id)+restricted_verb_noun.ct * condition.ct * experiment.ct, data=df, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(judgments_pre_vs_ent_model, variable = c("b_Intercept", "b_restricted_verb_noun.ct","b_condition.ct", "b_experiment.ct",
                                                           "b_restricted_verb_noun.ct:condition.ct","b_restricted_verb_noun.ct:experiment.ct", "b_condition.ct:experiment.ct", "b_restricted_verb_noun.ct:condition.ct:experiment.ct"))
##                                                         Estimate  Est.Error
## b_Intercept                                           3.17155348 0.03491197
## b_restricted_verb_noun.ct                             0.18454154 0.05421902
## b_condition.ct                                       -0.97750475 0.06926658
## b_experiment.ct                                       0.18892770 0.02441428
## b_restricted_verb_noun.ct:condition.ct                0.74641625 0.10829825
## b_restricted_verb_noun.ct:experiment.ct               0.07808064 0.03809621
## b_condition.ct:experiment.ct                         -0.23689949 0.04910272
## b_restricted_verb_noun.ct:condition.ct:experiment.ct -0.03452506 0.07682964
##                                                              Q2.5      Q97.5
## b_Intercept                                           3.103527852  3.2397261
## b_restricted_verb_noun.ct                             0.078572981  0.2894351
## b_condition.ct                                       -1.111916956 -0.8420723
## b_experiment.ct                                       0.141557341  0.2367926
## b_restricted_verb_noun.ct:condition.ct                0.533406757  0.9555333
## b_restricted_verb_noun.ct:experiment.ct               0.003558855  0.1529347
## b_condition.ct:experiment.ct                         -0.334749087 -0.1410888
## b_restricted_verb_noun.ct:condition.ct:experiment.ct -0.184841732  0.1154114
mcmc_plot(judgments_pre_vs_ent_model, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(judgments_pre_vs_ent_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb_noun.ct"] < 0)
C3=mean(samps[,"b_condition.ct"] > 0)
C4=mean(samps[,"b_experiment.ct"] < 0)
C5=mean(samps[,"b_restricted_verb_noun.ct:condition.ct"] < 0) 
C6=mean(samps[,"b_restricted_verb_noun.ct:experiment.ct"] < 0) 
C7=mean(samps[,"b_condition.ct:experiment.ct"] > 0) 
C8=mean(samps[,"b_restricted_verb_noun.ct:condition.ct:experiment.ct"] > 0) 

pMCMC=as.data.frame(c(C1,C2,C3,C4,C5,C6,C7,C8))
pMCMC
##   c(C1, C2, C3, C4, C5, C6, C7, C8)
## 1                      0.0000000000
## 2                      0.0003333333
## 3                      0.0000000000
## 4                      0.0000000000
## 5                      0.0000000000
## 6                      0.0186666667
## 7                      0.0000000000
## 8                      0.3262500000
#roughly predicted effect size from previous study was 1.0. Use it as an estimate of the max effect we expect here
Bf(0.11, 0.74, uniform = 0, meanoftheory = 0, sdtheory = 1.00/2, tail = 1)
## $LikelihoodTheory
## [1] 0.548309
## 
## $Likelihoodnull
## [1] 5.398322e-10
## 
## $BayesFactor
## [1] 1015702750
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.11, 0.74, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.02
print(high_threshold)
## [1] 4

Exploratory data analyses

Exploratory data analyses

Effect of statistical pre-emption: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

attested_vs_unattested = subset(preemption_judgment.df, restricted_verb_noun == "yes")

round(tapply(attested_vs_unattested$response, list(attested_vs_unattested$attested_unattested, attested_vs_unattested$experiment), mean),3)
##    exp1  exp2  exp3  exp4  exp5
## 0 2.362 2.259 2.423 2.717 2.363
## 1 4.952 4.881 4.908 4.375 4.526
#Center variables of interest using the lizCenter function:
d_attested_unattested = lizCenter(attested_vs_unattested , list("attested_unattested", "experiment"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_preemption <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct*experiment.ct, data=d_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_preemption, variable = c("b_Intercept", "b_attested_unattested.ct", "b_experiment.ct",  "b_attested_unattested.ct:experiment.ct"))
##                                           Estimate  Est.Error        Q2.5
## b_Intercept                             3.59017010 0.03001203  3.53237996
## b_attested_unattested.ct                2.31555317 0.08953190  2.14093928
## b_experiment.ct                        -0.04667132 0.02191017 -0.08988648
## b_attested_unattested.ct:experiment.ct -0.17390205 0.06530117 -0.30067129
##                                               Q97.5
## b_Intercept                             3.648190396
## b_attested_unattested.ct                2.492696623
## b_experiment.ct                        -0.004229704
## b_attested_unattested.ct:experiment.ct -0.046482291
mcmc_plot(attested_unattested_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_preemption))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 
C3=mean(samps[,"b_experiment.ct"] > 0) 
C4=mean(samps[,"b_attested_unattested.ct:experiment.ct"] > 0) 

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1       0.000000000
## 2       0.000000000
## 3       0.015500000
## 4       0.003916667
# prior from previous study with adults: 2.55 as a max
Bf(0.09, 2.32, uniform = 0, meanoftheory = 0, sdtheory = 2.55/2 , tail = 1)
## $LikelihoodTheory
## [1] 0.1202117
## 
## $Likelihoodnull
## [1] 2.257701e-144
## 
## $BayesFactor
## [1] 5.324518e+142
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.09, 2.32, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 4

Effect of statistical entrenchment: Comparison of adults’ judgment ratings (acceptability) for witnessed versus unwitnessed forms

attested_vs_unattested_ent = subset(entrenchment_judgment.df, restricted_verb_noun == "yes")

round(tapply(attested_vs_unattested_ent$response, list(attested_vs_unattested_ent$attested_unattested, attested_vs_unattested_ent$experiment), mean),3)
##    exp1  exp2  exp3  exp4  exp5
## 0 3.532 3.331 3.397 4.537 4.267
## 1 3.794 3.688 3.725 4.850 4.717
#Center variables of interest using the lizCenter function:
d_attested_unattested_ent = lizCenter(attested_vs_unattested_ent, list("attested_unattested","experiment"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment <- brm(formula =response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct*experiment.ct, data=d_attested_unattested_ent, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment, variable = c("b_Intercept", "b_attested_unattested.ct","b_experiment.ct", "b_attested_unattested.ct:experiment.ct"))
##                                          Estimate  Est.Error        Q2.5
## b_Intercept                            3.92566647 0.04221172  3.84299153
## b_attested_unattested.ct               0.32730795 0.05321024  0.22122748
## b_experiment.ct                        0.27711070 0.02977388  0.21901534
## b_attested_unattested.ct:experiment.ct 0.02896594 0.03822509 -0.04526074
##                                            Q97.5
## b_Intercept                            4.0086490
## b_attested_unattested.ct               0.4308097
## b_experiment.ct                        0.3353219
## b_attested_unattested.ct:experiment.ct 0.1036774
mcmc_plot(attested_unattested_entrenchment, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment))

C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0) 
C3=mean(samps[,"b_experiment.ct"] > 0) 
C4=mean(samps[,"b_attested_unattested.ct:experiment.ct"] > 0) 

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.0000000
## 3         1.0000000
## 4         0.7758333
# expect a difference of 0.38 from previous work
Bf(0.05, 0.33, uniform = 0, meanoftheory = 0, sdtheory = 0.346/2, tail = 1)
## $LikelihoodTheory
## [1] 0.8265516
## 
## $Likelihoodnull
## [1] 2.77336e-09
## 
## $BayesFactor
## [1] 298032561
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.05, 0.33, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 4

Entrenchment vs. preemption: ratings for witnessed vs. unwitnessed forms

attested_vs_unattested_across = subset(combined_judgment_data.df, restricted_verb_noun == "yes")


round(tapply(attested_vs_unattested_across$response, list(attested_vs_unattested_across$attested_unattested, attested_vs_unattested_across$condition, attested_vs_unattested_across$experiment), mean),3)
## , , exp1
## 
##   entrenchment preemption
## 0        3.532      2.362
## 1        3.794      4.952
## 
## , , exp2
## 
##   entrenchment preemption
## 0        3.331      2.259
## 1        3.688      4.881
## 
## , , exp3
## 
##   entrenchment preemption
## 0        3.397      2.423
## 1        3.725      4.908
## 
## , , exp4
## 
##   entrenchment preemption
## 0        4.537      2.717
## 1        4.850      4.375
## 
## , , exp5
## 
##   entrenchment preemption
## 0        4.267      2.363
## 1        4.717      4.526
#Center variables of interest using the lizCenter function:
df_attested_unattested = lizCenter(attested_vs_unattested_across, list("attested_unattested", "condition","experiment"))

# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment_preemption <- brm(formula = response~(1 +attested_unattested.ct|participant_private_id)+attested_unattested.ct * condition.ct * experiment.ct, data=df_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(attested_unattested_entrenchment_preemption, variable = c("b_Intercept", "b_attested_unattested.ct","b_condition.ct", "b_experiment.ct",
                                                           "b_attested_unattested.ct:condition.ct","b_attested_unattested.ct:experiment.ct", "b_condition.ct:experiment.ct", "b_attested_unattested.ct:condition.ct:experiment.ct"))
##                                                        Estimate  Est.Error
## b_Intercept                                          3.75983796 0.02557333
## b_attested_unattested.ct                             1.35936736 0.05136140
## b_condition.ct                                      -0.34535147 0.05122354
## b_experiment.ct                                      0.11168734 0.01827988
## b_attested_unattested.ct:condition.ct                1.99592109 0.10122389
## b_attested_unattested.ct:experiment.ct              -0.07401527 0.03691497
## b_condition.ct:experiment.ct                        -0.32220303 0.03632766
## b_attested_unattested.ct:condition.ct:experiment.ct -0.20543565 0.07307795
##                                                           Q2.5        Q97.5
## b_Intercept                                          3.7098392  3.809888481
## b_attested_unattested.ct                             1.2606250  1.461492028
## b_condition.ct                                      -0.4447886 -0.245876108
## b_experiment.ct                                      0.0756648  0.147337684
## b_attested_unattested.ct:condition.ct                1.7978287  2.193211692
## b_attested_unattested.ct:experiment.ct              -0.1465267 -0.001701166
## b_condition.ct:experiment.ct                        -0.3930293 -0.250686012
## b_attested_unattested.ct:condition.ct:experiment.ct -0.3495179 -0.062657392
mcmc_plot(attested_unattested_entrenchment_preemption, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(attested_unattested_entrenchment_preemption))


samps = as.matrix(as.mcmc(attested_unattested_entrenchment_preemption))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0)
C3=mean(samps[,"b_condition.ct"] > 0)
C4=mean(samps[,"b_experiment.ct"] < 0)
C5=mean(samps[,"b_attested_unattested.ct:condition.ct"] < 0) 
C6=mean(samps[,"b_attested_unattested.ct:experiment.ct"] < 0) 
C7=mean(samps[,"b_condition.ct:experiment.ct"] > 0) 
C8=mean(samps[,"b_attested_unattested.ct:condition.ct:experiment.ct"] > 0) 


pMCMC=as.data.frame(c(C1,C2,C3,C4,C5,C6,C7,C8))
pMCMC
##   c(C1, C2, C3, C4, C5, C6, C7, C8)
## 1                       0.000000000
## 2                       0.000000000
## 3                       0.000000000
## 4                       0.000000000
## 5                       0.000000000
## 6                       0.977833333
## 7                       0.000000000
## 8                       0.002666667
#max predicted effect size from previous study 2.11
Bf(0.10, 2.00, uniform = 0, meanoftheory = 0, sdtheory = 2.12/2, tail = 1)
## $LikelihoodTheory
## [1] 0.1283773
## 
## $Likelihoodnull
## [1] 5.520948e-87
## 
## $BayesFactor
## [1] 2.325277e+85
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.10, 2.00, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)

# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 4

Production data: Effect of statistical pre-emption

#Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted nouns against chance 

production_preemption_attested_unattested.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_attested_unattested.df <- subset(production_preemption_attested_unattested.df, restricted_verb_noun =="yes")


round(tapply(production_preemption_attested_unattested.df$attested_unattested, list(production_preemption_attested_unattested.df$verb_noun_type_training2, production_preemption_attested_unattested.df$experiment), mean),3)
##                exp1  exp2  exp3  exp4  exp5
## alternating      NA    NA    NA    NA    NA
## construction1 0.994 0.997 0.992 0.915 0.940
## construction2 0.994 0.990 0.997 0.823 0.944
## novel            NA    NA    NA    NA    NA
df_prod = lizCenter(production_preemption_attested_unattested.df , list("verb_noun_type_training2","experiment"))  

# maximally vague priors for the predictors and the intercept
prod_attested_unattested = brm(formula = attested_unattested ~verb_noun_type_training2.ct * experiment.ct + (1 + verb_noun_type_training2.ct|participant_private_id), data=df_prod, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested, variable = c("b_Intercept","b_verb_noun_type_training2.ct","b_experiment.ct","b_verb_noun_type_training2.ct:experiment.ct"))
##                                               Estimate Est.Error       Q2.5
## b_Intercept                                  6.2526678 0.4313391  5.4526789
## b_verb_noun_type_training2.ct                0.1636860 0.5874071 -0.9508451
## b_experiment.ct                             -0.8206444 0.2534354 -1.3238520
## b_verb_noun_type_training2.ct:experiment.ct -0.1315478 0.2779575 -0.6730400
##                                                  Q97.5
## b_Intercept                                  7.1407356
## b_verb_noun_type_training2.ct                1.3451253
## b_experiment.ct                             -0.3399602
## b_verb_noun_type_training2.ct:experiment.ct  0.4232833
mcmc_plot(prod_attested_unattested, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_verb_noun_type_training2.ct"] < 0)
C3=mean(samps[,"b_experiment.ct"] > 0)
C4=mean(samps[,"b_verb_noun_type_training2.ct:experiment.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.3949167
## 3         0.0005000
## 4         0.3087500
#same analyses without noun_training_type

# maximally vague priors for the intercept
prod_attested_unattested_final = brm(formula = attested_unattested ~ experiment.ct + (1|participant_private_id), data=df_prod, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_final, variable = c("b_Intercept","b_experiment.ct"))
##                   Estimate Est.Error      Q2.5      Q97.5
## b_Intercept      6.1007654 0.4200074  5.354257  6.9802577
## b_experiment.ct -0.8678397 0.2620767 -1.389178 -0.3681717
mcmc_plot(prod_attested_unattested_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_experiment.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1     0e+00
## 2     5e-04
# We will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the restricted verbs than for the novel verb

production_preemption_restricted_novel.df <- subset(preemption_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_preemption_restricted_novel.df<- subset(production_preemption_restricted_novel.df, verb_noun_type_training2 != "alternating")

# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$verb_noun_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_preemption_restricted_novel.df$attested_unattested)

production_preemption_restricted_novel.df$attested_unattested <- ifelse(production_preemption_restricted_novel.df$verb_noun_type_training2 == "novel" & production_preemption_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_preemption_restricted_novel.df$attested_unattested)

round(tapply(production_preemption_restricted_novel.df$attested_unattested, list(production_preemption_restricted_novel.df$verb_noun_type_training2, production_preemption_restricted_novel.df$experiment), mean),3)
##                exp1  exp2  exp3  exp4  exp5
## alternating      NA    NA    NA    NA    NA
## construction1 0.994 0.997 0.992 0.915 0.940
## construction2 0.994 0.990 0.997 0.823 0.944
## novel         0.470 0.478 0.565 0.497 0.463
production_preemption_restricted_novel1.df = lizCenter(production_preemption_restricted_novel.df, list("restricted_verb_noun","experiment"))

# maximally vague priors for the predictors and the intercept
prod_unattested_novel_final = brm(formula = attested_unattested ~restricted_verb_noun.ct*experiment.ct + (1 + restricted_verb_noun.ct|participant_private_id), data=production_preemption_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_unattested_novel_final, variable = c("b_Intercept","b_restricted_verb_noun.ct","b_experiment.ct","b_restricted_verb_noun.ct:experiment.ct"))
##                                           Estimate Est.Error       Q2.5
## b_Intercept                              3.9292693 0.2505454  3.4552749
## b_restricted_verb_noun.ct                5.3586636 0.4073001  4.5856597
## b_experiment.ct                         -0.4931289 0.1606078 -0.8181158
## b_restricted_verb_noun.ct:experiment.ct -0.7313033 0.2779011 -1.2841155
##                                              Q97.5
## b_Intercept                              4.4406941
## b_restricted_verb_noun.ct                6.1808788
## b_experiment.ct                         -0.1859818
## b_restricted_verb_noun.ct:experiment.ct -0.1904511
mcmc_plot(prod_unattested_novel_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb_noun.ct"] < 0)
C3=mean(samps[,"b_experiment.ct"] > 0)
C4=mean(samps[,"b_restricted_verb_noun.ct:experiment.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1      0.0000000000
## 2      0.0000000000
## 3      0.0006666667
## 4      0.0043333333

Production data: Effect of statistical entrenchment

#a. Are participants producing more attested than unattested dets?
# here, we want to see how often participants say the unattested e.g. transitive-only det1 for a det2 (intransitive-only) noun in the intransitive condition at test 
# and vice versa 


production_entrenchment_attested_unattested.df  <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_attested_unattested.df  <- subset(production_entrenchment_attested_unattested.df, restricted_verb_noun =="yes")

#We want to compare attested vs. unattested trials for transitive nouns in the intransitive inchoative construction at test
production_entrenchment_attested_unattested1.df  <- subset(production_entrenchment_attested_unattested.df, verb_noun_type_training2 == "construction1" & scene_test2 == "construction2")

#And intransitive inchoative nouns in the transitive construction at test. Filter out irrelevant trials
production_entrenchment_attested_unattested2.df  <- subset(production_entrenchment_attested_unattested.df, verb_noun_type_training2 == "construction2" & scene_test2 == "construction1")

production_entrenchment_attested_unattested.df <- rbind(production_entrenchment_attested_unattested1.df, production_entrenchment_attested_unattested2.df)


round(tapply(production_entrenchment_attested_unattested.df$attested_unattested, list(production_entrenchment_attested_unattested.df$verb_noun_type_training2, production_entrenchment_attested_unattested.df$experiment), mean),3)
##                exp1  exp2  exp3  exp4  exp5
## alternating      NA    NA    NA    NA    NA
## construction1 0.174 0.138 0.175 0.137 0.167
## construction2 0.122 0.107 0.112 0.109 0.167
## novel            NA    NA    NA    NA    NA
df_prod_ent = lizCenter((production_entrenchment_attested_unattested.df), list("verb_noun_type_training2","experiment"))  


# maximally vague priors for the predictors and the intercept
prod_attested_unattested_ent = brm(formula = attested_unattested ~verb_noun_type_training2.ct * experiment.ct + (1 + verb_noun_type_training2.ct|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent, variable = c("b_Intercept","b_verb_noun_type_training2.ct","b_experiment.ct","b_verb_noun_type_training2.ct:experiment.ct"))
##                                                  Estimate Est.Error       Q2.5
## b_Intercept                                 -3.6173262219 0.3379517 -4.3230440
## b_verb_noun_type_training2.ct               -0.4331151228 0.4028714 -1.2131739
## b_experiment.ct                             -0.0003029639 0.2126931 -0.4137416
## b_verb_noun_type_training2.ct:experiment.ct  0.1700293531 0.1824248 -0.1898127
##                                                  Q97.5
## b_Intercept                                 -3.0083434
## b_verb_noun_type_training2.ct                0.3767167
## b_experiment.ct                              0.4198792
## b_verb_noun_type_training2.ct:experiment.ct  0.5325602
mcmc_plot(prod_attested_unattested_ent, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent))
C1=mean(samps[,"b_Intercept"] > 0)
C2=mean(samps[,"b_verb_noun_type_training2.ct"] > 0)
C3=mean(samps[,"b_experiment.ct"] > 0)
C4=mean(samps[,"b_verb_noun_type_training2.ct:experiment.ct"] < 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.1383333
## 3         0.4986667
## 4         0.1719167
#same analyses without noun_training_type


# maximally vague priors for the intercept
prod_attested_unattested_ent_final = brm(formula = attested_unattested ~ experiment.ct + (1|participant_private_id), data=df_prod_ent, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_attested_unattested_ent_final, variable = c("b_Intercept","b_experiment.ct"))
##                    Estimate Est.Error       Q2.5     Q97.5
## b_Intercept     -3.42571272 0.3067878 -4.0646596 -2.867311
## b_experiment.ct -0.01569402 0.2066840 -0.4285101  0.388802
mcmc_plot(prod_attested_unattested_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_attested_unattested_ent_final))
C1=mean(samps[,"b_Intercept"] > 0)
C2=mean(samps[,"b_experiment.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2))
pMCMC
##   c(C1, C2)
## 1    0.0000
## 2    0.4675
# c. we will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the 2 non-alternating nouns than for the novel noun (presumably the “unwitnessed” form has to be set arbitrarily here)

production_entrenchment_restricted_novel.df <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_restricted_novel.df<- subset(production_entrenchment_restricted_novel.df, verb_noun_type_training2 != "alternating")

# all forms are unwitnessed for the novel noun so we are going to randomly set all det1s as attested and all dets2 as unattested 

production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_noun_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_entrenchment_restricted_novel.df$attested_unattested)
production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_noun_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_entrenchment_restricted_novel.df$attested_unattested)

# select trials featuring the novel noun in the intransitive inchoative construction
production_entrenchment_restricted_novel1.df <- subset(production_entrenchment_restricted_novel.df, verb_noun_type_training2 == "novel"  & scene_test2 == "construction2")


# Select trials featuring transitive nouns in the intransitive inchoative construction at test
production_entrenchment_restricted_novel2.df  <- subset(production_entrenchment_restricted_novel.df, verb_noun_type_training2 == "construction1" & scene_test2 == "construction2")

# Select trials featuring intransitive nouns in the transitive construction at test
production_entrenchment_restricted_novel3.df  <- subset(production_entrenchment_restricted_novel.df, verb_noun_type_training2 == "construction2" & scene_test2 == "construction1")


production_entrenchment_restricted_novel.df <- rbind(production_entrenchment_restricted_novel1.df, production_entrenchment_restricted_novel2.df, production_entrenchment_restricted_novel3.df)


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested, list(production_entrenchment_restricted_novel.df$verb_noun_type_training2, production_entrenchment_restricted_novel.df$experiment), mean),3)
##                exp1  exp2  exp3  exp4  exp5
## alternating      NA    NA    NA    NA    NA
## construction1 0.174 0.138 0.175 0.137 0.167
## construction2 0.122 0.107 0.112 0.109 0.167
## novel         0.036 0.051 0.051 0.077 0.060
# reverse coding to focus on unattested rather than attested for novel vs. restricted
production_entrenchment_restricted_novel.df <- rbind(production_entrenchment_restricted_novel1.df, production_entrenchment_restricted_novel2.df, production_entrenchment_restricted_novel3.df)
production_entrenchment_restricted_novel.df$attested_unattested<- recode(production_entrenchment_restricted_novel.df$attested_unattested, `1` = 0L, `0` = 1L)


round(tapply(production_entrenchment_restricted_novel.df$attested_unattested, list(production_entrenchment_restricted_novel.df$verb_noun_type_training2, production_entrenchment_restricted_novel.df$experiment), mean),3)
##                exp1  exp2  exp3  exp4  exp5
## alternating      NA    NA    NA    NA    NA
## construction1 0.826 0.862 0.825 0.863 0.833
## construction2 0.878 0.893 0.887 0.891 0.833
## novel         0.964 0.949 0.949 0.923 0.940
#what this means is that participants produce *unattested forms* less for the restricted than they do for the novel

production_entrenchment_restricted_novel1.df = lizCenter(production_entrenchment_restricted_novel.df, list("restricted_verb_noun","experiment"))


# maximally vague priors for the predictors and the intercept
prod_unattested_novel_ent_final = brm(formula = attested_unattested ~restricted_verb_noun.ct * experiment.ct + (1 + restricted_verb_noun.ct|participant_private_id), data=production_entrenchment_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))

posterior_summary(prod_unattested_novel_ent_final, variable = c("b_Intercept","b_restricted_verb_noun.ct","b_experiment.ct","b_restricted_verb_noun.ct:experiment.ct"))
##                                            Estimate Est.Error       Q2.5
## b_Intercept                              3.67224310 0.2742683  3.1658129
## b_restricted_verb_noun.ct               -0.56834312 0.4565166 -1.4889358
## b_experiment.ct                         -0.03504953 0.1665256 -0.3662826
## b_restricted_verb_noun.ct:experiment.ct  0.17081306 0.2236560 -0.2710999
##                                             Q97.5
## b_Intercept                             4.2380380
## b_restricted_verb_noun.ct               0.3089309
## b_experiment.ct                         0.2948875
## b_restricted_verb_noun.ct:experiment.ct 0.6119883
mcmc_plot(prod_unattested_novel_ent_final, variable = "^b_", regex = TRUE)

samps = as.matrix(as.mcmc(prod_unattested_novel_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb_noun.ct"] > 0)
C3=mean(samps[,"b_experiment.ct"] > 0)
C4=mean(samps[,"b_restricted_verb_noun.ct:experiment.ct"] > 0)

pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
##   c(C1, C2, C3, C4)
## 1         0.0000000
## 2         0.1043333
## 3         0.4162500
## 4         0.7809167