library(psych)
library(plyr)
library(doBy)
library(cowplot)
library(reshape2)
library(lme4)
library(brms)
library(tidyr)
library(tidyverse)
library(data.table)
library(janitor)
library(brms)
library(yarrr)
library(knitr)
source("lmedrop.R")
source("myCenter.R")
source("lizCenter.R")
source("summarySEwithin.R")
source("summarySE.R")
source("normDataWithin.R")
source("BF.R")
source("Bf_range.R")
source("Bf_powercalc.R")
theme_set(theme_bw())
This function can be found on the website “Cookbook for R”.
http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions
It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence intervals (default 95%).
data: a data frame.
measurevar: the name of a column that contains the variable to be summariezed
groupvars: a vector containing names of columns that contain grouping variables
na.rm: a boolean that indicates whether to ignore NA’s
conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
conf.interval=.95, .drop=TRUE) {
require(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
return(datac)
}
This function can be found on the website “Cookbook for R”.
http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions
It summarizes data, handling within-subjects variables by removing inter-subject variability. It will still work if there are no within-S variables. It gives count, un-normed mean, normed mean (with same between-group mean), standard deviation, standard error of the mean, and confidence intervals. If there are within-subject variables, calculate adjusted values using method from Morey (2008).
data: a data frame.
measurevar: the name of a column that contains the variable to be summarized
betweenvars: a vector containing names of columns that are between-subjects variables
withinvars: a vector containing names of columns that are within-subjects variables
idvar: the name of a column that identifies each subject (or matched subjects)
na.rm: a boolean that indicates whether to ignore NA’s
conf.interval: the percent range of the confidence interval (default is 95%)
summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) {
# Ensure that the betweenvars and withinvars are factors
factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
FUN=is.factor, FUN.VALUE=logical(1))
if (!all(factorvars)) {
nonfactorvars <- names(factorvars)[!factorvars]
message("Automatically converting the following non-factors to factors: ",
paste(nonfactorvars, collapse = ", "))
data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
}
# Get the means from the un-normed data
datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
# Drop all the unused columns (these will be calculated with normed data)
datac$sd <- NULL
datac$se <- NULL
datac$ci <- NULL
# Norm each subject's data
ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)
# This is the name of the new column
measurevar_n <- paste(measurevar, "_norm", sep="")
# Collapse the normed data - now we can treat between and within vars the same
ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
# Apply correction from Morey (2008) to the standard error and confidence interval
# Get the product of the number of conditions of within-S variables
nWithinGroups <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
FUN.VALUE=numeric(1)))
correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )
# Apply the correction factor
ndatac$sd <- ndatac$sd * correctionFactor
ndatac$se <- ndatac$se * correctionFactor
ndatac$ci <- ndatac$ci * correctionFactor
# Combine the un-normed means with the normed results
merge(datac, ndatac)
}
This function is used by the SummarySEWithin fucntion above. It can be found on the website “Cookbook for R”.
http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions
From that website: Norms the data within specified groups in a data frame; it normalizes each subject (identified by idvar) so that they have the same mean, within each group specified by betweenvars.
data: a data frame
idvar: the name of a column that identifies each subject (or matched subjects)
measurevar: the name of a column that contains the variable to be summarized
betweenvars: a vector containing names of columns that are between-subjects variables
na.rm: a boolean that indicates whether to ignore NA’s
normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
na.rm=FALSE, .drop=TRUE) {
require(plyr)
# Measure var on left, idvar + between vars on right of formula.
data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
.fun = function(xx, col, na.rm) {
c(subjMean = mean(xx[,col], na.rm=na.rm))
},
measurevar,
na.rm
)
# Put the subject means with original data
data <- merge(data, data.subjMean)
# Get the normalized data in a new column
measureNormedVar <- paste(measurevar, "_norm", sep="")
data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
mean(data[,measurevar], na.rm=na.rm)
# Remove this subject mean column
data$subjMean <- NULL
return(data)
}
This function outputs the centered values of an variable, which can be a numeric variable, a factor, or a data frame. It was taken from Florian Jaegers blog https://hlplab.wordpress.com/2009/04/27/centering-several-variables/.
From his blog:
-If the input is a numeric variable, the output is the centered variable.
-If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor’s levels are converted into numerical values in their inherent order (if not specified otherwise, R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.
-If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by “c” (e.g. “Variable1” will be “cVariable1”).
myCenter= function(x) {
if (is.numeric(x)) { return(x - mean(x, na.rm=T)) }
if (is.factor(x)) {
x= as.numeric(x)
return(x - mean(x, na.rm=T))
}
if (is.data.frame(x) || is.matrix(x)) {
m= matrix(nrow=nrow(x), ncol=ncol(x))
colnames(m)= paste("c", colnames(x), sep="")
for (i in 1:ncol(x)) {
m[,i]= myCenter(x[,i])
}
return(as.data.frame(m))
}
}
This function provides a wrapper around myCenter allowing you to center a specific list of variables from a dataframe. The input is a dataframe (x) and a list of the names of the variables which you wish to center (listfname). The output is a copy of the dataframe with a column (numeric) added for each of the centered variables with each one labelled with it’s previous name with “.ct” appended. For example, if x is a dataframe with columns “a” and “b” lizCenter(x, list(“a”, “b”)) will return a dataframe with two additional columns, a.ct and b.ct, which are numeric, centered codings of the corresponding variables.
lizCenter= function(x, listfname)
{
for (i in 1:length(listfname))
{
fname = as.character(listfname[i])
x[paste(fname,".ct", sep="")] = myCenter(x[fname])
}
return(x)
}
###get_coeffs This function allows us to inspect particular coefficients from the output of an lme model by putting them in table.
x: the output returned when running lmer or glmer (i.e. an object of type lmerMod or glmerMod)
list: a list of the names of the coefficients to be extracted (e.g. c(“variable1”, “variable1:variable2”))
get_coeffs <- function(x,list){(as.data.frame(summary(x)$coefficients)[list,])}
This function is equivalent to the Dienes (2008) calculator which can be found here: http://www.lifesci.sussex.ac.uk/home/Zoltan_Dienes/inference/Bayes.htm.
The code was provided by Baguely and Kayne (2010) and can be found here: http://www.academia.edu/427288/Review_of_Understanding_psychology_as_a_science_An_introduction_to_scientific_and_statistical_inference
Bf<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0,sdtheory=1, tail=1){
area <- 0
if(identical(uniform, 1)){
theta <- lower
range <- upper - lower
incr <- range / 2000
for (A in -1000:1000){
theta <- theta + incr
dist_theta <- 1 / range
height <- dist_theta * dnorm(obtained, theta, sd)
area <- area + height * incr
}
}else
{theta <- meanoftheory - 5 * sdtheory
incr <- sdtheory / 200
for (A in -1000:1000){
theta <- theta + incr
dist_theta <- dnorm(theta, meanoftheory, sdtheory)
if(identical(tail, 1)){
if (theta <= 0){
dist_theta <- 0
} else {
dist_theta <- dist_theta * 2
}
}
height <- dist_theta * dnorm(obtained, theta, sd)
area <- area + height * incr
}
}
LikelihoodTheory <- area
Likelihoodnull <- dnorm(obtained, 0, sd)
BayesFactor <- LikelihoodTheory / Likelihoodnull
ret <- list("LikelihoodTheory" = LikelihoodTheory,"Likelihoodnull" = Likelihoodnull, "BayesFactor" = BayesFactor)
ret
}
This works with the Bf function above. It requires the same values as that function (i.e. the obtained mean and SE for the current sample, a value for the predicted mean, which is set to be sdtheory (with meanoftheory=0), and the current number of participants N). However, rather than returning a BF for the current sample, it works out what the BF would be for a range of different subject numbers (assuming that the SE scales with sqrt(N)).
Bf_powercalc<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0, sdtheory=1, tail=2, N, min, max)
{
x = c(0)
y = c(0)
# note: working out what the difference between N and df is (for the contrast between two groups, this is 2; for constraints where there is 4 groups this will be 3, etc.)
for(newN in min : max)
{
B = as.numeric(Bf(sd = sd*sqrt(N/newN), obtained, uniform, lower, upper, meanoftheory, sdtheory, tail)[3])
x= append(x,newN)
y= append(y,B)
output = cbind(x,y)
}
output = output[-1,]
return(output)
}
This works with the Bf function above. It requires the obtained mean and SE for the current sample and works out what the BF would be for a range of predicted means (which are set to be sdtheoryrange (with meanoftheory=0)).
Bf_range<-function(sd, obtained, meanoftheory=0, sdtheoryrange, tail=1)
{
x = c(0)
y = c(0)
for(sdi in sdtheoryrange)
{
B = as.numeric(Bf(sd, obtained, meanoftheory=0, uniform = 0, sdtheory=sdi, tail)[3])
x= append(x,sdi)
y= append(y,B)
output = cbind(x,y)
}
output = output[-1,]
colnames(output) = c("sdtheory", "BF")
return(output)
}
# Experiment 1 - verb study with adults
#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp1_production_data.csv")
combined_judgment_data.df <- read.csv("exp1_judgment_data.csv")
combined_judgment_data.df$restricted_verb <- factor(combined_judgment_data.df$restricted_verb)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)
#separately for entrenchment and preemption
#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$transitivity_test_scene2 <- factor(entrenchment_production.df$transitivity_test_scene2)
# Create columns that we will need to run production analyses in entrenchment
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced
entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)
entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$transitivity_test_scene2 <- factor(entrenchment_judgment.df$transitivity_test_scene2)
entrenchment_judgment.df$restricted_verb <- factor(entrenchment_judgment.df$restricted_verb)
#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct) #actually, all NAs here
preemption_production.df$transitivity_test_scene2 <- factor(preemption_production.df$transitivity_test_scene2)
# Create columns that we will need to run production analyses in pre-emption
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced
preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)
preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$transitivity_test_scene2 <- factor(preemption_judgment.df$transitivity_test_scene2)
preemption_judgment.df$restricted_verb <- factor(preemption_judgment.df$restricted_verb)
#Figure 2
RQ1_graph_productions.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "alternating" |verb_type_training2 == "novel")
# aggregated dataframe for means
aggregated.graph1 = aggregate(semantically_correct ~ verb_type_training2 + Participant.Private.ID, RQ1_graph_productions.df, FUN=mean)
aggregated.graph1 <- rename(aggregated.graph1, verb = verb_type_training2,
correct = semantically_correct)
yarrr::pirateplot(formula = correct ~ verb,
data = aggregated.graph1,
main = "",
theme=2,
point.o = .3,
gl.col = 'white',
ylab = "% semantically correct",
cex.lab = 1,
cex.axis = 1,
cex.names = 1,
yaxt = "n")
axis(2, at = seq(0, 1, by = 0.25), las=1)
abline(h = 0.50, lty = 2)
#1 alternating verb production
alternating_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "alternating")
#and filter out responses where participants said something other than det1 or det2
alternating_prod.df = subset(alternating_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
# aggregated dataframe for means
aggregated.means_alternating_prod.df = aggregate(semantically_correct ~ transitivity_test_scene2 + Participant.Private.ID, alternating_prod.df, FUN=mean)
# average accuracy across trial types
round(mean(aggregated.means_alternating_prod.df$semantically_correct),3)
## [1] 0.945
# average accuracy separately for causative and inchoative scenes
round(tapply(aggregated.means_alternating_prod.df$semantically_correct, aggregated.means_alternating_prod.df$transitivity_test_scene2, mean),3)
## construction1 construction2
## 0.971 0.919
# maximally vague priors for the intercept and the predictors
a = lizCenter(alternating_prod.df, list("transitivity_test_scene2"))
alternating_model <-brm(formula = semantically_correct~transitivity_test_scene2.ct + (1 + transitivity_test_scene2.ct|Participant.Private.ID), data=a, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(alternating_model, variable = c("b_Intercept", "b_transitivity_test_scene2.ct" ))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 3.1723207 0.3542948 2.551599 3.9295528
## b_transitivity_test_scene2.ct -0.7626614 0.5106406 -1.780862 0.2221214
mcmc_plot(alternating_model, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(alternating_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0.00000
## 2 0.06575
# no difference between construction 1 and construction 2
# Final model
# maximally vague priors for the intercept
alternating_model_final = brm(formula = semantically_correct~1 + (1|Participant.Private.ID), data=a, family = bernoulli(link = logit),set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(alternating_model_final, variable = c("b_Intercept"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 3.025849 0.3362604 2.451086 3.763533
mcmc_plot(alternating_model_final, variable = "b_Intercept", regex = TRUE)
samps = as.matrix(as.mcmc(alternating_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
#2 novel verb production
novel_prod.df = subset(entrenchment_production.df, condition == "entrenchment" & verb_type_training2 == "novel")
#filter out responses where participants said something other than det1 or det2
novel_prod.df = subset(novel_prod.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
# aggregated dataframe for means
aggregated.means_novel_prod.df = aggregate(semantically_correct ~ transitivity_test_scene2 + Participant.Private.ID, novel_prod.df, FUN=mean)
# average accuracy across trial types
round(mean(aggregated.means_novel_prod.df$semantically_correct),3)
## [1] 0.955
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_novel_prod.df$semantically_correct, aggregated.means_novel_prod.df$transitivity_test_scene2, mean),3)
## construction1 construction2
## 0.946 0.964
b = lizCenter(novel_prod.df, list("transitivity_test_scene2"))
# maximally vague priors for the intercept and the predictors
novel_model <- brm(formula = semantically_correct~transitivity_test_scene2.ct + (1 + transitivity_test_scene2.ct|Participant.Private.ID), data=b, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(novel_model, variable = c("b_Intercept", "b_transitivity_test_scene2.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 3.48331886 0.3849779 2.786681 4.290036
## b_transitivity_test_scene2.ct 0.05728035 0.6079753 -1.151471 1.221468
mcmc_plot(novel_model, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(novel_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0.0000000
## 2 0.4583333
# no difference between construction 1 and construction 2
# Final model
# maximally vague priors for the intercept
novel_model_final <- brm(formula = semantically_correct~1+ (1|Participant.Private.ID), data=b, family = bernoulli(link = logit), set_prior("normal(0,1)", class="Intercept"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
summary(novel_model_final, WAIC=T)
## Family: bernoulli
## Links: mu = logit
## Formula: semantically_correct ~ 1 + (1 | Participant.Private.ID)
## Data: b (Number of observations: 333)
## Draws: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup draws = 12000
##
## Group-Level Effects:
## ~Participant.Private.ID (Number of levels: 42)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.04 0.44 0.19 1.97 1.00 3321 3984
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.24 0.36 2.61 4.01 1.00 7111 6927
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mcmc_plot(novel_model_final, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(novel_model_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0
#Figure 3
RQ1_graph_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "alternating" |verb_type_training2 == "novel")
# aggregated dataframe for means
aggregated.graph2 = aggregate(Response ~ verb_type_training2 + semantically_correct + Participant.Private.ID, RQ1_graph_judgments.df, FUN=mean)
aggregated.graph2$semantically_correct <- recode(aggregated.graph2$semantically_correct, "1" = "yes","0" = "no")
aggregated.graph2 <- rename(aggregated.graph2, verb = verb_type_training2,
correct = semantically_correct)
yarrr::pirateplot(formula = Response ~ correct + verb,
data = aggregated.graph2,
main = "",
theme=2,
point.o = .3,
gl.col = 'white',
ylab = "Rating",
cex.lab = 0.8,
cex.axis = 1,
cex.names = 0.8,
yaxt = "n")
axis(2, at = seq(1, 9, by = 1), las=1)
#1 alternating verb judgments
alternating_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "alternating")
# aggregated dataframe for means
aggregated.means_alternating_judgments = aggregate(Response ~ transitivity_test_scene2 + semantically_correct + Participant.Private.ID, alternating_judgments.df, FUN=mean)
aggregated.means_alternating_judgments$semantically_correct<- recode(aggregated.means_alternating_judgments$semantically_correct, "1" = "yes","0" = "no")
aggregated.means_alternating_judgments$transitivity_test_scene2<- recode(aggregated.means_alternating_judgments$transitivity_test_scene2, "construction1" = "transitive causative","construction2" = "intransitive inchoative")
# average accuracy for semantically correct vs. incorrect trials across causative and noncausative trial types
round(tapply(aggregated.means_alternating_judgments$Response, aggregated.means_alternating_judgments$semantically_correct, mean),3)
## no yes
## 2.506 4.837
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_alternating_judgments$Response, list(aggregated.means_alternating_judgments$semantically_correct, aggregated.means_alternating_judgments$transitivity_test_scene2), mean),3)
## transitive causative intransitive inchoative
## no 2.512 2.500
## yes 4.779 4.895
c = lizCenter(alternating_judgments.df, list("transitivity_test_scene2", "semantically_correct"))
# maximally vague priors for the predictors (we don't interpret the intercept here)
alternating_model_judgments <-brm(formula = Response~transitivity_test_scene2.ct * semantically_correct.ct + (1 + transitivity_test_scene2.ct*semantically_correct.ct|Participant.Private.ID), data=c, family = gaussian(), set_prior("normal(0,1)", class="b"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(alternating_model_judgments, variable = c("b_Intercept", "b_transitivity_test_scene2.ct", "b_semantically_correct.ct", "b_transitivity_test_scene2.ct:semantically_correct.ct"))
## Estimate Est.Error
## b_Intercept 3.68636373 0.08026067
## b_transitivity_test_scene2.ct 0.05044603 0.06748378
## b_semantically_correct.ct 2.29086054 0.12788451
## b_transitivity_test_scene2.ct:semantically_correct.ct 0.12472471 0.16271351
## Q2.5 Q97.5
## b_Intercept 3.52785324 3.8438775
## b_transitivity_test_scene2.ct -0.08124381 0.1824122
## b_semantically_correct.ct 2.03645849 2.5411248
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.19811459 0.4435577
mcmc_plot(alternating_model_judgments, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(alternating_model_judgments))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] < 0)
C3=mean(samps[,"b_semantically_correct.ct"] < 0)
C4=mean(samps[,"b_transitivity_test_scene2.ct:semantically_correct.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
## c(C1, C2, C3, C4)
## 1 0.0000000
## 2 0.2274167
## 3 0.0000000
## 4 0.2180000
# no difference between construction 1 and construction 2
# Final model
# maximally vague priors for the predictors (we don't interpret the intercept here)
alternating_model_judgments_final <-brm(formula = Response~semantically_correct.ct + (1 + semantically_correct.ct|Participant.Private.ID), data=c, family = gaussian(), set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(alternating_model_judgments, variable = c("b_Intercept", "b_semantically_correct.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 3.686364 0.08026067 3.527853 3.843877
## b_semantically_correct.ct 2.290861 0.12788451 2.036458 2.541125
mcmc_plot(alternating_model_judgments_final, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(alternating_model_judgments_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_semantically_correct.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0
## 2 0
#2 novel verb judgments
novel_judgments.df = subset(entrenchment_judgment.df, condition == "entrenchment" & verb_type_training2 == "novel")
# aggregated dataframe for means
aggregated.means_novel_judgments = aggregate(Response ~ transitivity_test_scene2 + semantically_correct + Participant.Private.ID, novel_judgments.df, FUN=mean)
# average accuracy for semantically correct vs. incorrect trials across causative and noncausative trial types
round(tapply(aggregated.means_novel_judgments$Response, aggregated.means_novel_judgments$semantically_correct, mean),3)
## 0 1
## 2.250 4.174
# average accuracy separately for causative and noncausative scenes
round(tapply(aggregated.means_novel_judgments$Response, list(aggregated.means_novel_judgments$semantically_correct, aggregated.means_novel_judgments$transitivity_test_scene2), mean),3)
## construction1 construction2
## 0 2.302 2.198
## 1 4.221 4.128
d = lizCenter(novel_judgments.df, list("transitivity_test_scene2", "semantically_correct"))
# maximally vague priors for the predictors (we don't interpret the intercept here)
novel_model_judgments <-brm(formula = Response~transitivity_test_scene2.ct * semantically_correct.ct + (1 + transitivity_test_scene2.ct*semantically_correct.ct|Participant.Private.ID), data=d, family = gaussian(), set_prior("normal(0,1)", class="b"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(novel_model_judgments, variable = c("b_Intercept", "b_transitivity_test_scene2.ct", "b_semantically_correct.ct", "b_transitivity_test_scene2.ct:semantically_correct.ct"))
## Estimate Est.Error
## b_Intercept 3.19995203 0.11181142
## b_transitivity_test_scene2.ct -0.09828225 0.09368539
## b_semantically_correct.ct 1.87153811 0.16294775
## b_transitivity_test_scene2.ct:semantically_correct.ct 0.01255795 0.19897327
## Q2.5 Q97.5
## b_Intercept 2.9779211 3.41579196
## b_transitivity_test_scene2.ct -0.2824359 0.08306548
## b_semantically_correct.ct 1.5402723 2.18074793
## b_transitivity_test_scene2.ct:semantically_correct.ct -0.3775237 0.40117231
mcmc_plot(novel_model_judgments, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(novel_model_judgments))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_transitivity_test_scene2.ct"] > 0)
C3=mean(samps[,"b_semantically_correct.ct"] < 0)
C4=mean(samps[,"b_transitivity_test_scene2.ct:semantically_correct.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2,C3,C4))
pMCMC
## c(C1, C2, C3, C4)
## 1 0.0000000
## 2 0.1466667
## 3 0.0000000
## 4 0.4761667
# no difference between construction 1 and construction 2
# Final model
# maximally vague priors for the predictors (we don't interpret the intercept here)
novel_model_judgments_final <-brm(formula = Response~semantically_correct.ct + (1 + semantically_correct.ct|Participant.Private.ID), data=d, family = gaussian(), set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(alternating_model_judgments, variable = c("b_Intercept", "b_semantically_correct.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 3.686364 0.08026067 3.527853 3.843877
## b_semantically_correct.ct 2.290861 0.12788451 2.036458 2.541125
mcmc_plot(novel_model_judgments_final, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(novel_model_judgments_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_semantically_correct.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0
## 2 0
#Figure 4
#first, filter our semantically incorrect trials
judgments_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")
#we only want to keep novel
judgments_novel.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "novel")
#and restricted items
judgment_unattested_constr1.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")
judgment_unattested_constr2.df <- subset(judgments_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")
judgment_unattested_novel.df <- rbind(judgments_novel.df, judgment_unattested_constr1.df, judgment_unattested_constr2.df)
aggregated.means = aggregate(Response ~ condition + restricted_verb + Participant.Private.ID, judgment_unattested_novel.df, FUN=mean)
aggregated.means<- rename(aggregated.means, restricted = restricted_verb)
yarrr::pirateplot(formula = Response ~ restricted + condition,
data = aggregated.means,
main = "",
theme=2,
point.o = .3,
gl.col = 'white',
ylab = "Rating",
cex.lab = 0.8,
cex.axis = 1,
cex.names = 0.8,
yaxt = "n")
axis(2, at = seq(1, 9, by = 1), las=1)
round(tapply(judgment_unattested_novel.df$Response, judgment_unattested_novel.df$restricted_verb, mean),3)
## no yes
## 3.434 3.140
#Center variables of interest using the lizCenter function:
d_unattested_novel = lizCenter(judgment_unattested_novel.df, list("restricted_verb"))
# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_preemption_model <- brm(formula = Response~(1 +restricted_verb.ct|Participant.Private.ID)+restricted_verb.ct, data=d_unattested_novel, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(judgments_preemption_model, variable = c("b_Intercept", "b_restricted_verb.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 3.5593592 0.1237609 3.3254207 3.8127584
## b_restricted_verb.ct -0.1302255 0.1291689 -0.3838593 0.1211288
mcmc_plot(judgments_preemption_model, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(judgments_preemption_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0.00000
## 2 0.84525
# BF analyses: we use the difference between attested and unattested in the pilot study reported at rpubs.com/AnnaSamara/333562 as the maximum difference we could expect in comparing rating for unattested vs. novel constructions (SD = 3.15/2)
Bf(0.17, 0.65, uniform = 0, meanoftheory = 0, sdtheory = 3.15/2, tail = 1)
## $LikelihoodTheory
## [1] 0.4629748
##
## $Likelihoodnull
## [1] 0.001570015
##
## $BayesFactor
## [1] 294.8856
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.17, 0.65, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)
# RRs for which BF > 3
ev_for_h1 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h1$sdtheory)
high_threshold <- max(ev_for_h1$sdtheory)
print(low_threshold)
## [1] 0.06
print(high_threshold)
## [1] 4
#first, filter our semantically incorrect trials
entrenchment_judgment_unattested_novel.df <- subset(entrenchment_judgment.df, semantically_correct == "1")
#we only want to keep novel
entrenchment_judgment_novel.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "novel")
#and restricted items
entrenchment_judgment_unattested_constr1.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")
entrenchment_judgment_unattested_constr2.df <- subset(entrenchment_judgment_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")
entrenchment_judgment_unattested_novel.df <- rbind(entrenchment_judgment_novel.df, entrenchment_judgment_unattested_constr1.df, entrenchment_judgment_unattested_constr2.df)
entrenchment_judgment_unattested_novel.df$restricted_verb <- factor(entrenchment_judgment_unattested_novel.df$restricted_verb , levels = c("yes", "no"))
round(tapply(entrenchment_judgment_unattested_novel.df$Response, entrenchment_judgment_unattested_novel.df$restricted_verb, mean),3)
## yes no
## 4.552 4.174
#Center variables of interest using the lizCenter function:
d_unattested_novel_entrenchment = lizCenter(entrenchment_judgment_unattested_novel.df , list("restricted_verb"))
# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_entrenchment_model <- brm(formula = Response~(1 +restricted_verb.ct|Participant.Private.ID)+restricted_verb.ct, data=d_unattested_novel_entrenchment, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(judgments_entrenchment_model, variable = c("b_Intercept", "b_restricted_verb.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 4.3685696 0.1138122 4.1458594 4.59417035
## b_restricted_verb.ct -0.3666131 0.1662219 -0.6920101 -0.03670027
mcmc_plot(judgments_entrenchment_model, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(judgments_entrenchment_model))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)
# the effect is in the opposite direction
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0.0000000
## 2 0.9859167
# drawing a max based on the difference between attested vs. unattested in this experiment
# BF analyses: we use the difference between attested and unattested in this study (attested > unattested provides supporting evidence for entrenchment) as a maximum we expect when comparing ratings for unattested vs. novel constructions (SD = 0.38/2)
Bf(0.16, -0.36, uniform = 0, meanoftheory = 0, sdtheory = 0.38/2, tail = 1)
## $LikelihoodTheory
## [1] 0.0482932
##
## $Likelihoodnull
## [1] 0.1983728
##
## $BayesFactor
## [1] 0.2434466
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.16, -0.36, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)
# RRs for which BF <1/3
ev_for_h0 <- subset(data.frame(range_test), BF < 1/3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0
print(high_threshold)
## [1] 4
#first, filter our semantically incorrect trials
all_judgment_unattested_novel.df <- subset(combined_judgment_data.df, semantically_correct == "1")
#we only want to keep novel
all_judgment_novel.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "novel")
#and restricted items
all_judgment_unattested_constr1.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "construction1" & attested_unattested == "0")
all_judgment_unattested_constr2.df <- subset(all_judgment_unattested_novel.df, verb_type_training2 == "construction2" & attested_unattested == "0")
all_judgment_unattested_novel.df <- rbind(all_judgment_novel.df, all_judgment_unattested_constr1.df, all_judgment_unattested_constr2.df)
round(tapply(all_judgment_unattested_novel.df$Response, list(all_judgment_unattested_novel.df$restricted_verb, all_judgment_unattested_novel.df$condition), mean),3)
## entrenchment preemption
## no 4.174 3.026
## yes 4.552 2.362
#Center variables of interest using the lizCenter function:
df = lizCenter(all_judgment_unattested_novel.df, list("restricted_verb", "condition"))
# maximally vague priors for the predictors (we don't interpret the intercept here)
judgments_pre_vs_ent_model <- brm(formula = Response~(1 +restricted_verb.ct|Participant.Private.ID)+restricted_verb.ct * condition.ct, data=df, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(judgments_pre_vs_ent_model, variable = c("b_Intercept", "b_restricted_verb.ct", "b_restricted_verb.ct:condition.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 3.2911753 0.07696888 3.1395119 3.44293845
## b_restricted_verb.ct -0.2812720 0.12084997 -0.5170055 -0.04227185
## b_restricted_verb.ct:condition.ct -0.9948664 0.22991094 -1.4444801 -0.54254173
mcmc_plot(judgments_pre_vs_ent_model, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(judgments_pre_vs_ent_model))
C1=mean(samps[,"b_restricted_verb.ct"] > 0)
C2=mean(samps[,"b_restricted_verb.ct:condition.ct"] > 0)
C3=mean(samps[,"b_Intercept"] > 0)
pMCMC=as.data.frame(c(C1,C2,C3))
pMCMC
## c(C1, C2, C3)
## 1 0.01025
## 2 0.00000
## 3 1.00000
#roughly predicted effect size from adult pilot study was 2.91. Use it as max for unattested vs. novel (SD = 2.91/2)
Bf(0.22, 0.99, uniform = 0, meanoftheory = 0, sdtheory = 2.91/2, tail = 1)
## $LikelihoodTheory
## [1] 0.4323971
##
## $Likelihoodnull
## [1] 7.265337e-05
##
## $BayesFactor
## [1] 5951.508
H1RANGE = seq(0,4,by=0.01) # [5-1]-[0] - max effect of preemption minus no effect of entrenchment
range_test <- Bf_range(0.22, 0.99, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)
# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.06
print(high_threshold)
## [1] 4
# Figure
judgments_unattested_attested.df <- subset(combined_judgment_data.df, semantically_correct == "1")
judgments_unattested_attested.df <- subset(judgments_unattested_attested.df, restricted_verb == "yes")
aggregated.means1 = aggregate(Response ~ condition + attested_unattested + Participant.Private.ID, judgments_unattested_attested.df , FUN=mean)
aggregated.means1<- rename(aggregated.means1, attested = attested_unattested)
aggregated.means1$attested<- recode(aggregated.means1$attested, "1" = "yes","0" = "no")
yarrr::pirateplot(formula = Response ~ attested + condition,
data = aggregated.means1,
main = "",
theme=2,
point.o = .3,
gl.col = 'white',
ylab = "Rating",
cex.lab = 0.8,
cex.axis = 1,
cex.names = 0.8,
yaxt = "n")
axis(2, at = seq(1, 9, by = 1), las=1)
# analyses
attested_vs_unattested = subset(preemption_judgment.df, restricted_verb == "yes" & semantically_correct == "1")
round(tapply(attested_vs_unattested$Response, attested_vs_unattested$attested_unattested, mean),3)
## 0 1
## 2.362 4.952
#Center variables of interest using the lizCenter function:
d_attested_unattested = lizCenter(attested_vs_unattested , list("attested_unattested"))
# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_preemption <- brm(formula =Response~(1 +attested_unattested.ct|Participant.Private.ID)+attested_unattested.ct, data=d_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(attested_unattested_preemption, variable = c("b_Intercept", "b_attested_unattested.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 3.672064 0.05918382 3.554625 3.789804
## b_attested_unattested.ct 2.553816 0.12070388 2.312894 2.789910
mcmc_plot(attested_unattested_preemption, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(attested_unattested_preemption))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0
## 2 0
# this prior (5.00-1.85 = 3.15) is drawn from previous pilot study with 10 adults in preemption that was preregistered at https://rpubs.com/AnnaSamara/333562
Bf(0.12, 2.55, uniform = 0, meanoftheory = 0, sdtheory = 3.15, tail = 1)
## $LikelihoodTheory
## [1] 0.1824811
##
## $Likelihoodnull
## [1] 2.92535e-98
##
## $BayesFactor
## [1] 6.237925e+96
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.12, 2.55, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)
# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.01
print(high_threshold)
## [1] 4
attested_vs_unattested_ent = subset(entrenchment_judgment.df, restricted_verb == "yes" & semantically_correct == "1")
round(tapply(attested_vs_unattested_ent$Response, attested_vs_unattested_ent$attested_unattested, mean),3)
## 0 1
## 4.552 4.942
#Center variables of interest using the lizCenter function:
d_attested_unattested_ent = lizCenter(attested_vs_unattested_ent, list("attested_unattested"))
# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment <- brm(formula =Response~(1 +attested_unattested.ct|Participant.Private.ID)+attested_unattested.ct, data=d_attested_unattested_ent, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(attested_unattested_entrenchment, variable = c("b_Intercept", "b_attested_unattested.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 4.7509762 0.06219752 4.6302269 4.8738085
## b_attested_unattested.ct 0.3809755 0.12537524 0.1339973 0.6251299
mcmc_plot(attested_unattested_entrenchment, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(attested_unattested_entrenchment))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0.00000
## 2 0.00225
# we preregistered that the max effect of entrenchment here would be 1 based on adult data suggesting difference never more than 1 with novel verbs, i.e. SD = 0.5
Bf(0.13, 0.38, uniform = 0, meanoftheory = 0, sdtheory = 0.5, tail = 1)
## $LikelihoodTheory
## [1] 1.175537
##
## $Likelihoodnull
## [1] 0.04281328
##
## $BayesFactor
## [1] 27.45731
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.13, 0.38, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)
# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.06
print(high_threshold)
## [1] 4
attested_vs_unattested_across = subset(combined_judgment_data.df, restricted_verb == "yes" & semantically_correct == "1")
round(tapply(attested_vs_unattested_across$Response, list(attested_vs_unattested_across$condition, attested_vs_unattested_across$attested_unattested), mean),3)
## 0 1
## entrenchment 4.552 4.942
## preemption 2.362 4.952
#Center variables of interest using the lizCenter function:
df_attested_unattested = lizCenter(attested_vs_unattested_across, list("attested_unattested", "condition"))
# maximally vague priors for the predictors (we don't interpret the intercept here)
attested_unattested_entrenchment_preemption <- brm(formula = Response~(1 +attested_unattested.ct|Participant.Private.ID)+attested_unattested.ct * condition.ct, data=df_attested_unattested, family=gaussian(),set_prior("normal(0,1)", class="b"),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
posterior_summary(attested_unattested_entrenchment_preemption, variable = c("b_Intercept","b_condition.ct", "b_attested_unattested.ct","b_attested_unattested.ct:condition.ct"))
## Estimate Est.Error Q2.5 Q97.5
## b_Intercept 4.056252 0.04429631 3.969651 4.1431523
## b_condition.ct -1.051687 0.08427545 -1.217628 -0.8882511
## b_attested_unattested.ct 1.781098 0.09039936 1.603493 1.9597006
## b_attested_unattested.ct:condition.ct 2.113711 0.17127497 1.773577 2.4418378
mcmc_plot(attested_unattested_entrenchment_preemption, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(attested_unattested_entrenchment_preemption))
C1=mean(samps[,"b_Intercept"] < 0)
C1=mean(samps[,"b_condition.ct"] < 0)
C2=mean(samps[,"b_attested_unattested.ct"] < 0)
C3=mean(samps[,"b_attested_unattested.ct:condition.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2,C3))
pMCMC
## c(C1, C2, C3)
## 1 1
## 2 0
## 3 0
#roughly predicted effect size from adult pilot study: 2.91
Bf(0.17, 2.11, uniform = 0, meanoftheory = 0, sdtheory = 2.91, tail = 1)
## $LikelihoodTheory
## [1] 0.210635
##
## $Likelihoodnull
## [1] 8.289253e-34
##
## $BayesFactor
## [1] 2.541061e+32
H1RANGE = seq(0,4,by=0.01)
range_test <- Bf_range(0.17, 2.11, meanoftheory=0, sdtheoryrange= H1RANGE, tail=1)
# find values for which BF > 3
ev_for_h0 <- subset(data.frame(range_test), BF > 3)
low_threshold <- min(ev_for_h0$sdtheory)
high_threshold <- max(ev_for_h0$sdtheory)
print(low_threshold)
## [1] 0.02
print(high_threshold)
## [1] 4
data_long_e <- gather(entrenchment_production.df, det_type, produced, det1:none, factor_key=TRUE)
data_long_e$transitivity_test_scene2 <-recode(data_long_e$transitivity_test_scene2, "construction1" = "test: transitive causative","construction2" = "test: intransitive inchoative")
p = ggplot(data_long_e, aes(x = verb_type_training2, y = produced, fill = det_type)) +
geom_bar(stat = "identity", position = "fill") +
theme(panel.grid.major = element_blank()) +
facet_grid("transitivity_test_scene2") +
theme(panel.grid.minor = element_blank()) +
scale_fill_manual(values=c("grey", "grey15", "azure3","azure4"), name="particle", labels=c("transitive causative", "intransitive inchoative", "other", "none")) +
scale_x_discrete(labels=c("alternating" = "alternating", "construction1" = "transitive causative", "construction2" = "intransitive inchoative", "novel" = "novel")) +
ylab("proportion produced") +
xlab("verb type at training")
p
#a. Are participants producing more attested than unattested dets? we will now compare proportion of attested dets (that's the intercept) for the restricted verbs against chance
production_entrenchment_attested_unattested.df <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_attested_unattested.df <- subset(production_entrenchment_attested_unattested.df, restricted_verb =="yes")
#How much of the time are participants producing attested items?
round(mean(production_entrenchment_attested_unattested.df$attested_unattested),3)
## [1] 0.567
# and separately for each verb type
round(tapply(production_entrenchment_attested_unattested.df$attested_unattested, production_entrenchment_attested_unattested.df$verb_type_training2, mean),3)
## construction1 construction2
## 0.578 0.555
production_entrenchment_attested_unattested.df$verb_type_training2 <- factor(production_entrenchment_attested_unattested.df$verb_type_training2)
df_prod_ent = lizCenter((production_entrenchment_attested_unattested.df), list("verb_type_training2"))
# maximally vague priors for the predictors and the intercept
prod_attested_unattested_ent = brm(formula = attested_unattested ~verb_type_training2.ct + (1 + verb_type_training2.ct|Participant.Private.ID), data=df_prod_ent, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)),cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
summary(prod_attested_unattested_ent, WAIC=T)
## Family: bernoulli
## Links: mu = logit
## Formula: attested_unattested ~ verb_type_training2.ct + (1 + verb_type_training2.ct | Participant.Private.ID)
## Data: df_prod_ent (Number of observations: 688)
## Draws: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup draws = 12000
##
## Group-Level Effects:
## ~Participant.Private.ID (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept) 0.31 0.15 0.03 0.61 1.00
## sd(verb_type_training2.ct) 0.14 0.11 0.01 0.41 1.00
## cor(Intercept,verb_type_training2.ct) -0.09 0.58 -0.96 0.94 1.00
## Bulk_ESS Tail_ESS
## sd(Intercept) 3048 3890
## sd(verb_type_training2.ct) 8003 4963
## cor(Intercept,verb_type_training2.ct) 13752 7680
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 0.27 0.09 0.09 0.46 1.00 9326
## verb_type_training2.ct -0.10 0.16 -0.40 0.21 1.00 15336
## Tail_ESS
## Intercept 8780
## verb_type_training2.ct 8349
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mcmc_plot(prod_attested_unattested_ent, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(prod_attested_unattested_ent))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_verb_type_training2.ct"] > 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0.002333333
## 2 0.268583333
#same analyses without verb_training_type
# maximally vague priors for the intercept
prod_attested_unattested_ent_final = brm(formula = attested_unattested ~1 + (1|Participant.Private.ID), data=df_prod_ent, family = bernoulli(link = logit), set_prior("normal(0, 1)", class = "Intercept"), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
summary(prod_attested_unattested_ent_final, WAIC=T)
## Family: bernoulli
## Links: mu = logit
## Formula: attested_unattested ~ 1 + (1 | Participant.Private.ID)
## Data: df_prod_ent (Number of observations: 688)
## Draws: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup draws = 12000
##
## Group-Level Effects:
## ~Participant.Private.ID (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.30 0.15 0.03 0.59 1.00 2573 3474
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.27 0.09 0.10 0.46 1.00 9138 7770
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mcmc_plot(prod_attested_unattested_ent_final, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(prod_attested_unattested_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C1
## [1] 0.001333333
# c. we will now compare unattested for restricted vs. novel
# Do participants produce the unwitnessed form less for the 2 non-alternating verbs than for the novel verb (presumably the “unwitnessed” form has to be set arbitrarily here)
production_entrenchment_restricted_novel.df <- subset(entrenchment_production.df, det_lenient_adapted == "det_construction1" | det_lenient_adapted == "det_construction2")
production_entrenchment_restricted_novel.df<- subset(production_entrenchment_restricted_novel.df, verb_type_training2 != "alternating")
# all forms are unwitnessed for the novel verb so we are going to randomly set all det1s as attested and all dets2 as unattested
production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction1", 1, production_entrenchment_restricted_novel.df$attested_unattested)
production_entrenchment_restricted_novel.df$attested_unattested <- ifelse(production_entrenchment_restricted_novel.df$verb_type_training2 == "novel" & production_entrenchment_restricted_novel.df$det_lenient_adapted == "det_construction2", 0, production_entrenchment_restricted_novel.df$attested_unattested)
round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$verb_type_training2, mean),3)
## construction1 construction2 novel
## 0.578 0.555 0.489
# proportion of attested items for each verb type - we will now compare constr1/2 vs. novel
round(tapply(production_entrenchment_restricted_novel.df$attested_unattested , production_entrenchment_restricted_novel.df$restricted_verb, mean),3)
## no yes
## 0.489 0.567
production_entrenchment_restricted_novel.df$restricted_verb <- factor(production_entrenchment_restricted_novel.df$restricted_verb)
production_entrenchment_restricted_novel1.df = lizCenter(production_entrenchment_restricted_novel.df, list("restricted_verb"))
# maximally vague priors for the predictors and the intercept
prod_unattested_novel_ent_final = brm(formula = attested_unattested ~restricted_verb.ct + (1 + restricted_verb.ct|Participant.Private.ID), data=production_entrenchment_restricted_novel1.df, family = bernoulli(link = logit), prior = c(prior(normal(0, 1), class = Intercept), prior(normal(0, 1), class = b)), cores=4, warmup = 2000, iter=5000, chains=4, control=list(adapt_delta = 0.99))
summary(prod_unattested_novel_ent_final, WAIC=T)
## Family: bernoulli
## Links: mu = logit
## Formula: attested_unattested ~ restricted_verb.ct + (1 + restricted_verb.ct | Participant.Private.ID)
## Data: production_entrenchment_restricted_novel1.df (Number of observations: 1021)
## Draws: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup draws = 12000
##
## Group-Level Effects:
## ~Participant.Private.ID (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept) 0.13 0.09 0.01 0.33 1.00
## sd(restricted_verb.ct) 0.34 0.21 0.02 0.80 1.00
## cor(Intercept,restricted_verb.ct) 0.45 0.51 -0.82 0.99 1.00
## Bulk_ESS Tail_ESS
## sd(Intercept) 3094 5041
## sd(restricted_verb.ct) 3212 5153
## cor(Intercept,restricted_verb.ct) 4241 6846
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.17 0.07 0.04 0.30 1.00 16646 9481
## restricted_verb.ct 0.31 0.15 0.02 0.61 1.00 14606 8415
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mcmc_plot(prod_unattested_novel_ent_final, variable = "^b_", regex = TRUE)
samps = as.matrix(as.mcmc(prod_unattested_novel_ent_final))
C1=mean(samps[,"b_Intercept"] < 0)
C2=mean(samps[,"b_restricted_verb.ct"] < 0)
pMCMC=as.data.frame(c(C1,C2))
pMCMC
## c(C1, C2)
## 1 0.00550000
## 2 0.01516667
#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp2_production_data.csv")
combined_judgment_data.df <- read.csv("exp2_judgment_data.csv")
combined_judgment_data.df$restricted_verb <- factor(combined_judgment_data.df$restricted_verb)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)
#separately for entrenchment and preemption
#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$transitivity_test_scene2 <- factor(entrenchment_production.df$transitivity_test_scene2)
# Create columns that we will need to run production analyses in entrenchment
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced
entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)
entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$transitivity_test_scene2 <- factor(entrenchment_judgment.df$transitivity_test_scene2)
entrenchment_judgment.df$restricted_verb <- factor(entrenchment_judgment.df$restricted_verb)
#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct) #actually, all NAs here
preemption_production.df$transitivity_test_scene2 <- factor(preemption_production.df$transitivity_test_scene2)
# Create columns that we will need to run production analyses in pre-emption
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced
preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)
preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$transitivity_test_scene2 <- factor(preemption_judgment.df$transitivity_test_scene2)
preemption_judgment.df$restricted_verb <- factor(preemption_judgment.df$restricted_verb)
#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp3_production_data.csv")
combined_judgment_data.df <- read.csv("exp3_judgment_data.csv")
combined_judgment_data.df$restricted_verb <- factor(combined_judgment_data.df$restricted_verb)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)
#separately for entrenchment and preemption
#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$transitivity_test_scene2 <- factor(entrenchment_production.df$transitivity_test_scene2)
# Create columns that we will need to run production analyses in entrenchment
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced
entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)
entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$transitivity_test_scene2 <- factor(entrenchment_judgment.df$transitivity_test_scene2)
entrenchment_judgment.df$restricted_verb <- factor(entrenchment_judgment.df$restricted_verb)
#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct) #actually, all NAs here
preemption_production.df$transitivity_test_scene2 <- factor(preemption_production.df$transitivity_test_scene2)
# Create columns that we will need to run production analyses in pre-emption
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced
preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)
preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$transitivity_test_scene2 <- factor(preemption_judgment.df$transitivity_test_scene2)
preemption_judgment.df$restricted_verb <- factor(preemption_judgment.df$restricted_verb)
#Create the dataframes that we will be working on
combined_production_data.df <- read.csv("exp4_production_data.csv")
combined_judgment_data.df <- read.csv("exp4_judgment_data.csv")
combined_judgment_data.df$restricted_noun <- factor(combined_judgment_data.df$restricted_noun)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)
#separately for entrenchment and preemption
#entrenchment
entrenchment_production.df <- subset(combined_production_data.df, condition == "entrenchment")
entrenchment_production.df$semantically_correct <- as.numeric(entrenchment_production.df$semantically_correct)
entrenchment_production.df$noun_type_test <- factor(entrenchment_production.df$noun_type_test)
# Create columns that we will need to run production analyses in entrenchment
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced
entrenchment_production.df$det1 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction1", 1, 0)
entrenchment_production.df$det2 <- ifelse(entrenchment_production.df$det_lenient_adapted == "det_construction2", 1, 0)
entrenchment_production.df$other <- ifelse(entrenchment_production.df$det_lenient_adapted == "other", 1, 0)
entrenchment_production.df$none <- ifelse(entrenchment_production.df$det_lenient_adapted == "none", 1, 0)
entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$noun_type_test <- factor(entrenchment_judgment.df$noun_type_test)
entrenchment_judgment.df$restricted_noun <- factor(entrenchment_judgment.df$restricted_noun)
#preemption
preemption_production.df <- subset(combined_production_data.df, condition == "preemption")
preemption_production.df$semantically_correct <- as.numeric(preemption_production.df$semantically_correct) #actually, all NAs here
preemption_production.df$noun_type_test <- factor(preemption_production.df$noun_type_test)
# Create columns that we will need to run production analyses in pre-emption
# We want some columns coding which of particle 1, particle 2, 'other' and 'none' was produced
preemption_production.df$det1 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction1", 1, 0)
preemption_production.df$det2 <- ifelse(preemption_production.df$det_lenient_adapted == "det_construction2", 1, 0)
preemption_production.df$other <- ifelse(preemption_production.df$det_lenient_adapted == "other", 1, 0)
preemption_production.df$none <- ifelse(preemption_production.df$det_lenient_adapted == "none", 1, 0)
preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$noun_type_test <- factor(preemption_judgment.df$noun_type_test)
preemption_judgment.df$restricted_noun <- factor(preemption_judgment.df$restricted_noun)
#Create the dataframes that we will be working on
combined_judgment_data.df <- read.csv("exp5_judgment_data.csv")
combined_judgment_data.df$restricted_noun <- factor(combined_judgment_data.df$restricted_noun)
combined_judgment_data.df$condition <- factor(combined_judgment_data.df$condition)
#separately for entrenchment and preemption
#entrenchment
entrenchment_judgment.df <- subset(combined_judgment_data.df, condition == "entrenchment")
entrenchment_judgment.df$semantically_correct <- factor(entrenchment_judgment.df$semantically_correct)
entrenchment_judgment.df$noun_type_test <- factor(entrenchment_judgment.df$noun_type_test)
entrenchment_judgment.df$restricted_noun <- factor(entrenchment_judgment.df$restricted_noun)
#preemption
preemption_judgment.df <- subset(combined_judgment_data.df, condition == "preemption")
preemption_judgment.df$semantically_correct <- factor(preemption_judgment.df$semantically_correct)
preemption_judgment.df$noun_type_test <- factor(preemption_judgment.df$noun_type_test)
preemption_judgment.df$restricted_noun <- factor(preemption_judgment.df$restricted_noun)
# Missing data? (I need to add the dataframes for production to run these analyses)
# Missing data? (I need to add the dataframes for production to run these analyses)