SimData <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Exp4Sim.csv")
Exp1 <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/Exp4Clean.csv")
We’ll do the analysis in the same order as in the paper.
So we start with Simulation 1
SimData$Author <- factor(SimData$Author)
SimData$Systematicity <- factor(SimData$Systematicity)
SimData$Phonology <- factor(SimData$Phonology)
SimData$BlockMinus <- SimData$Block - 1
SimData$BlockMinus <- as.numeric(SimData$BlockMinus)
SimData$MonCatCorr <- as.numeric(SimData$MonCatCorr)
SimData$IndCorr <- as.numeric(SimData$IndCorr)
SimData <- droplevels.data.frame(SimData)
str(SimData)
## 'data.frame': 15360 obs. of 24 variables:
## $ Author : Factor w/ 2 levels "M","N": 2 2 2 2 2 2 2 2 2 2 ...
## $ Systematicity: Factor w/ 2 levels "A","S": 1 1 1 1 1 1 1 1 1 1 ...
## $ Phonology : Factor w/ 2 levels "D","P": 2 2 2 2 2 2 2 2 2 2 ...
## $ Condition : Factor w/ 8 levels "MAD","MAP","MSD",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ Within : num 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 ...
## $ Between : num 6.26 6.26 6.26 6.26 6.26 6.26 6.26 6.26 6.26 6.26 ...
## $ PhonWithin : num NA NA NA NA NA NA NA NA NA NA ...
## $ PhonBetween : num NA NA NA NA NA NA NA NA NA NA ...
## $ Simnum : int 1 1 1 1 1 1 1 1 1 1 ...
## $ SimID : Factor w/ 320 levels "MAD1","MAD10",..: 201 201 201 201 201 201 201 201 201 201 ...
## $ Block : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LexItemNum : int 2 1 7 11 8 6 9 12 3 10 ...
## $ OutputCat : num 0.25 0.25 0.75 0.75 0.75 0.25 0.75 0.75 0.25 0.75 ...
## $ DiffOutputcat: num 0.197 0.197 0.197 0.235 0.197 ...
## $ IndError : int 1 1 0 0 1 1 1 1 1 1 ...
## $ MonCatError : int 1 1 0 0 0 1 0 0 1 0 ...
## $ DCatError : int 1 1 0 0 0 1 0 0 1 0 ...
## $ CatDiff : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
## $ IndCorr : num 0 0 1 1 0 0 0 0 0 0 ...
## $ MonCatCorr : num 0 0 1 1 1 0 1 1 0 1 ...
## $ DCatCorr : int 0 0 1 1 1 0 1 1 0 1 ...
## $ MonTrueCat : int 0 0 0 0 1 0 1 1 0 1 ...
## $ DTrueCat : int 0 0 0 0 1 0 1 1 0 1 ...
## $ BlockMinus : num 0 0 0 0 0 0 0 0 0 0 ...
#Simulation 1 uses only Monaghan and only compares Systematic to Arbitrary languages (no Phonological Dispersion, no Nielsen Phonemes)
Sim1Data <- subset(SimData, Author == "M")
Sim1Data <- subset(Sim1Data, Phonology == "P")
Sim1.afex.C <- mixed(MonCatCorr ~ Systematicity * BlockMinus + (1 + BlockMinus|SimID),
data = Sim1Data,
family = binomial,
control = glmerControl(optimizer = "bobyqa"),
method = 'LRT',
progress = FALSE)
## Contrasts set to contr.sum for the following variables: Systematicity, SimID
## Numerical variables NOT centered on 0 (i.e., interpretation of all main effects might be difficult if in interactions): BlockMinus
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 2 negative
## eigenvalues
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
summary(Sim1.afex.C) #Summary Table
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## MonCatCorr ~ Systematicity * BlockMinus + (1 + BlockMinus | SimID)
## Data: Sim1Data
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 2437.1 2480.9 -1211.6 2423.1 3833
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.67435 0.00003 0.00003 0.59725 0.83635
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## SimID (Intercept) 0.000e+00 0.000e+00
## BlockMinus 1.337e-14 1.156e-07 NaN
## Number of obs: 3840, groups: SimID, 80
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.4617 558.1661 0.019 0.985
## Systematicity1 -10.1043 558.1661 -0.018 0.986
## BlockMinus 0.1122 298.3519 0.000 1.000
## Systematicity1:BlockMinus 0.1122 298.3519 0.000 1.000
##
## Correlation of Fixed Effects:
## (Intr) Systm1 BlckMn
## Systemtcty1 -1.000
## BlockMinus -0.802 0.802
## Systmtc1:BM 0.802 -0.802 -1.000
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 2 negative eigenvalues
Sim1.afex.C$anova_table # Read Results from Here
## Mixed Model Anova Table (Type 3 tests, LRT-method)
##
## Model: MonCatCorr ~ Systematicity * BlockMinus + (1 + BlockMinus | SimID)
## Data: Sim1Data
## Df full model: 7
## Df Chisq Chi Df Pr(>Chisq)
## Systematicity 6 197.46 1 <2e-16 ***
## BlockMinus 6 0.00 1 0.9999
## Systematicity:BlockMinus 6 0.00 1 0.9999
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(MonCatCorr ~ Systematicity, data= Sim1Data, f1))
## Systematicity MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1 A 0.6645833 0.4722589 0.07467069
## 2 S 1.0000000 0.0000000 0.00000000
do.call(data.frame, aggregate(MonCatCorr ~ Block, data= Sim1Data, f1))
## Block MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1 1 0.8031250 0.3978443 0.06290471
## 2 2 0.8093750 0.3929987 0.06213855
## 3 3 0.8416667 0.3652435 0.05775008
## 4 4 0.8750000 0.3308913 0.05231851
do.call(data.frame, aggregate(MonCatCorr ~ Systematicity * Block, data= Sim1Data, f1))
## Systematicity Block MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1 A 1 0.6062500 0.4890903 0.07733196
## 2 S 1 1.0000000 0.0000000 0.00000000
## 3 A 2 0.6187500 0.4862005 0.07687505
## 4 S 2 1.0000000 0.0000000 0.00000000
## 5 A 3 0.6833333 0.4656615 0.07362755
## 6 S 3 1.0000000 0.0000000 0.00000000
## 7 A 4 0.7500000 0.4334645 0.06853675
## 8 S 4 1.0000000 0.0000000 0.00000000
Sim1.afex.I <- mixed(IndCorr ~ Systematicity * BlockMinus + (1 + BlockMinus|SimID),
data = Sim1Data,
family = binomial,
control = glmerControl(optimizer = "bobyqa"),
method = 'LRT',
progress = FALSE)
## Contrasts set to contr.sum for the following variables: Systematicity, SimID
## Numerical variables NOT centered on 0 (i.e., interpretation of all main effects might be difficult if in interactions): BlockMinus
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning: contrasts dropped from factor SimID due to missing levels
## Warning: contrasts dropped from factor SimID due to missing levels
summary(Sim1.afex.I) #Summary Table
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: IndCorr ~ Systematicity * BlockMinus + (1 + BlockMinus | SimID)
## Data: Sim1Data
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 4657.2 4701.0 -2321.6 4643.2 3833
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5226 -0.7126 -0.4947 0.9378 2.7694
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## SimID (Intercept) 4.19e-02 0.204699
## BlockMinus 5.09e-05 0.007135 1.00
## Number of obs: 3840, groups: SimID, 80
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.33307 0.06982 -19.094 < 2e-16 ***
## Systematicity1 -0.45735 0.06943 -6.587 4.48e-11 ***
## BlockMinus 0.48044 0.03306 14.532 < 2e-16 ***
## Systematicity1:BlockMinus -0.01152 0.03297 -0.349 0.727
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Systm1 BlckMn
## Systemtcty1 0.190
## BlockMinus -0.783 -0.151
## Systmtc1:BM -0.150 -0.781 0.122
Sim1.afex.I$anova_table # Read Results from Here
## Mixed Model Anova Table (Type 3 tests, LRT-method)
##
## Model: IndCorr ~ Systematicity * BlockMinus + (1 + BlockMinus | SimID)
## Data: Sim1Data
## Df full model: 7
## Df Chisq Chi Df Pr(>Chisq)
## Systematicity 6 39.8939 1 2.681e-10 ***
## BlockMinus 6 154.2932 1 < 2.2e-16 ***
## Systematicity:BlockMinus 6 0.1219 1 0.727
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(IndCorr ~ Systematicity, data= Sim1Data, f1))
## Systematicity IndCorr.Mean IndCorr.SD IndCorr.SE
## 1 A 0.2661458 0.4420566 0.06989528
## 2 S 0.4682292 0.4991196 0.07891774
do.call(data.frame, aggregate(IndCorr ~ Block, data= Sim1Data, f1))
## Block IndCorr.Mean IndCorr.SD IndCorr.SE
## 1 1 0.2104167 0.4078170 0.06448154
## 2 2 0.3197917 0.4666388 0.07378207
## 3 3 0.4229167 0.4942799 0.07815252
## 4 4 0.5156250 0.5000163 0.07905952
do.call(data.frame, aggregate(IndCorr ~ Systematicity * Block, data= Sim1Data, f1))
## Systematicity Block IndCorr.Mean IndCorr.SD IndCorr.SE
## 1 A 1 0.1437500 0.3512021 0.05552992
## 2 S 1 0.2770833 0.4480249 0.07083896
## 3 A 2 0.2166667 0.4124034 0.06520670
## 4 S 2 0.4229167 0.4945378 0.07819330
## 5 A 3 0.2958333 0.4568926 0.07224107
## 6 S 3 0.5500000 0.4980128 0.07874273
## 7 A 4 0.4083333 0.4920382 0.07779807
## 8 S 4 0.6229167 0.4851617 0.07671080
Now with those stats in hand we can output some graphs
#Aggregate Data
Sim1Ind <- aggregate(IndCorr ~ SimID * Block * Systematicity, data = Sim1Data, mean)
Sim1Cat <- aggregate(MonCatCorr ~ SimID * Block * Systematicity, data = Sim1Data, mean)
#Add a coding colum
Sim1Ind$Type <- "Individuation"
Sim1Cat$Type <- "Categorisation"
#Give columns the same names
colnames(Sim1Ind) <- c("SimID", "Block", "Systematicity", "Correctness", "Type")
colnames(Sim1Cat) <- c("SimID", "Block", "Systematicity", "Correctness", "Type")
#Rowbind into a single data frame
Sim1Agg <- rbind(Sim1Ind, Sim1Cat)
#Rename some stuff
Sim1Agg$Systematicity <- revalue(Sim1Agg$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
ggplot(data=Sim1Agg, aes(x=Block, y=Correctness)) +
geom_jitter(aes(shape = Systematicity), height = 0, size = 0.8, colour = 'darkgray') +
geom_smooth(method = 'lm', aes(linetype = Systematicity), size = 1.2, colour = "black") +
labs(x="Block", y="Proportion Correct") +
#ggtitle("Results of Simulation I") +
facet_wrap( ~ Type, nrow = 2) +
theme(strip.text.y = element_text(angle = 0))+
theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
theme_alan() +
theme(legend.title=element_blank())
So that’s the best representation of the data, but unfortunately we don’t have that kind of data for Padraic’s 2011 paper, so we need to plot it another way
Sim1Agg2 <- do.call(data.frame, aggregate(Correctness ~ Systematicity * Block * Type, data= Sim1Agg, f1))
colnames(Sim1Agg2) <- c("Systematicity", "Block", "Type", "Correctness", "SD", "SE")
Sim1Agg2$Source <- "Replication"
Sim1Agg2 <- subset(Sim1Agg2, select = c("Source", "Systematicity", "Type", "Block", "Correctness", "SE"))
#Bump Down Some values so they will display nicely on the graph
Sim1Agg2$Correctness <- mapvalues(Sim1Agg2$Correctness,
from = 1,
to = 0.97)
#Load in Monaghan Results
MonResults <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/MonResults.csv")
# Combine into a single dataframe
Comparison <- rbind(Sim1Agg2, MonResults)
#Plot the Results
ggplot(data=Comparison, aes(x=Block, y= Correctness)) +
geom_line(stat = "identity",
aes(col = Systematicity, linetype = Source),
size = 1.2) +
geom_errorbar(aes(ymin = Correctness - SE,
ymax = Correctness + SE,
width = 0.1)) +
labs(x="Block", y="Proportion Correct") +
facet_wrap( ~ Type, nrow = 2) +
theme(strip.text.y = element_text(angle = 0))+
theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
theme_alan() +
theme(legend.title=element_blank()) +
scale_colour_manual(values = c("black", "#666666")) +
scale_linetype_manual(values = c("solid", "dotted"))
Now we need graphs for Within-category errors
Comparison$ID <- paste(Comparison$Source, Comparison$Systematicity, Comparison$Block, sep = '')
CompInd <- subset(Comparison, Type == "Individuation")
CompCat <- subset(Comparison, Type == "Categorisation")
CompInd$Individuation <- CompInd$Correctness
CompCat$Categorisation <- CompCat$Correctness
CompInd <- arrange(CompInd, ID)
CompCat <- arrange(CompCat, ID)
CatError <- CompCat
CatError$Individuation <- CompInd$Individuation
CatError$Within <- CatError$Categorisation - CatError$Individuation
CatError <- subset(CatError, select = c(Source, Systematicity, Type, Block, Within))
ggplot(data=CatError, aes(x=Block, y= Within)) +
geom_line(stat = "identity",
aes(col = Systematicity, linetype = Source),
size = 1.2) +
labs(x="Block", y="Within-Category Error") +
theme(strip.text.y = element_text(angle = 0))+
theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
theme_alan() +
theme(legend.title=element_blank()) +
scale_colour_manual(values = c("black", "#666666")) +
scale_linetype_manual(values = c("solid", "dotted")) +
scale_y_continuous(limits = c(0, 1),
breaks = c(0.25, 0.5, 0.75,1))
Now we take a look at the full Simulation results
library(standardize)
Sim2.C.Std <- standardize(MonCatCorr ~ Systematicity * Author * Phonology + Block + (1|SimID), data= SimData, family= binomial)
Sim2.C.mod <- lmer(Sim2.C.Std$formula, Sim2.C.Std$data)
Sim2.afex.C <- mixed(Sim2.C.mod,
data = Sim2.C.Std$data,
family = binomial,
control = glmerControl(optimizer = "Nelder_Mead"),
method = 'LRT',
progress = FALSE,
check_contrasts = FALSE)
## Formula (the first argument) converted to formula.
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00175552 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.066735 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
summary(Sim2.afex.C) #Summary Table
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: MonCatCorr ~ Systematicity * Author * Phonology + Block + (1 |
## SimID)
## Data: Sim2.C.Std$data
## Control: glmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## 8650.2 8726.6 -4315.1 8630.2 15350
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.1174 0.0000 0.1306 0.4204 1.0394
##
## Random effects:
## Groups Name Variance Std.Dev.
## SimID (Intercept) 0 0
## Number of obs: 15360, groups: SimID, 320
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.04698 1.53497 4.591 4.41e-06 ***
## SystematicityA -5.80758 1.53496 -3.784 0.000155 ***
## AuthorM -0.08005 1.53464 -0.052 0.958398
## PhonologyD -4.11069 1.53497 -2.678 0.007406 **
## Block 0.50232 0.02781 18.060 < 2e-16 ***
## SystematicityA:AuthorM -0.18244 1.53464 -0.119 0.905372
## SystematicityA:PhonologyD 4.68933 1.53496 3.055 0.002250 **
## AuthorM:PhonologyD -0.11072 1.53464 -0.072 0.942487
## SystematicityA:AuthorM:PhonologyD -0.21597 1.53464 -0.141 0.888084
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SystmA AuthrM PhnlgD Block SyA:AM SyA:PD AtM:PD
## SystemtctyA -1.000
## AuthorM -0.422 0.422
## PhonologyD -0.999 0.999 0.422
## Block 0.004 0.000 0.000 0.003
## SystmtcA:AM 0.422 -0.422 -1.000 -0.422 0.000
## SystmtcA:PD 0.999 -0.999 -0.422 -1.000 -0.001 0.422
## AthrM:PhnlD 0.422 -0.422 -0.999 -0.422 0.000 0.999 0.422
## SystA:AM:PD -0.422 0.422 0.999 0.422 0.000 -0.999 -0.422 -1.000
## convergence code: 0
## Model failed to converge with max|grad| = 0.00175552 (tol = 0.001, component 1)
Sim2.afex.C$anova_table # Read Results from Here
## Mixed Model Anova Table (Type 3 tests, LRT-method)
##
## Model: MonCatCorr ~ Systematicity * Author * Phonology + Block + (1 |
## Model: SimID)
## Data: $
## Data: Sim2.C.Std
## Data: data
## Df full model: 10
## Df Chisq Chi Df Pr(>Chisq)
## Systematicity 9 833.708 1 < 2.2e-16 ***
## Author 9 0.000 1 0.9997
## Phonology 9 39.628 1 3.073e-10 ***
## Block 9 345.642 1 < 2.2e-16 ***
## Systematicity:Author 9 0.000 1 0.9997
## Systematicity:Phonology 9 196.613 1 < 2.2e-16 ***
## Author:Phonology 9 0.000 1 0.9996
## Systematicity:Author:Phonology 9 0.000 1 0.9996
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(MonCatCorr ~ Block, data= SimData, f1))
## Block MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1 1 0.7958333 0.4031438 0.06374264
## 2 2 0.8583333 0.3487533 0.05514275
## 3 3 0.8919271 0.3105129 0.04909640
## 4 4 0.9208333 0.2700339 0.04269610
do.call(data.frame, aggregate(MonCatCorr ~ Systematicity, data= SimData, f1))
## Systematicity MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1 A 0.7432292 0.43688033 0.06907685
## 2 S 0.9902344 0.09834387 0.01554953
do.call(data.frame, aggregate(MonCatCorr ~ Author, data= SimData, f1))
## Author MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1 M 0.8527344 0.3543936 0.05603455
## 2 N 0.8807292 0.3241280 0.05124914
do.call(data.frame, aggregate(MonCatCorr ~ Phonology, data= SimData, f1))
## Phonology MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1 D 0.9080729 0.2889418 0.04568571
## 2 P 0.8253906 0.3796574 0.06002910
Unfortunately the above models fail to converge meaningfully - so below I’ll try a glm without the random effects
SimDataAgg.C <- do.call(data.frame, aggregate(MonCatCorr ~ Systematicity * Author * Phonology * Block * SimID,
data = SimData, f1))
aov_ez("SimID", "MonCatCorr.Mean", data = SimDataAgg.C,
between = c("Systematicity", "Author", "Phonology"),
within = "Block",
return = "nice")
## Contrasts set to contr.sum for the following variables: Systematicity, Author, Phonology
## Anova Table (Type 3 tests)
##
## Response: MonCatCorr.Mean
## Effect df MSE F ges
## 1 Systematicity 1, 312 0.01 3272.61 *** .80
## 2 Author 1, 312 0.01 42.04 *** .05
## 3 Phonology 1, 312 0.01 366.70 *** .31
## 4 Systematicity:Author 1, 312 0.01 54.59 *** .06
## 5 Systematicity:Phonology 1, 312 0.01 560.40 *** .41
## 6 Author:Phonology 1, 312 0.01 93.71 *** .10
## 7 Systematicity:Author:Phonology 1, 312 0.01 112.04 *** .12
## 8 Block 3, 936 0.00 287.91 *** .36
## 9 Systematicity:Block 3, 936 0.00 142.62 *** .22
## 10 Author:Block 3, 936 0.00 12.85 *** .02
## 11 Phonology:Block 3, 936 0.00 60.60 *** .11
## 12 Systematicity:Author:Block 3, 936 0.00 6.28 *** .01
## 13 Systematicity:Phonology:Block 3, 936 0.00 5.31 ** .01
## 14 Author:Phonology:Block 3, 936 0.00 8.67 *** .02
## 15 Systematicity:Author:Phonology:Block 3, 936 0.00 4.93 ** .010
## p.value
## 1 <.0001
## 2 <.0001
## 3 <.0001
## 4 <.0001
## 5 <.0001
## 6 <.0001
## 7 <.0001
## 8 <.0001
## 9 <.0001
## 10 <.0001
## 11 <.0001
## 12 .0003
## 13 .001
## 14 <.0001
## 15 .002
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
Let’s see if things converge any better for individuation
Sim2.I.Std <- standardize(IndCorr ~ Systematicity * Author * Phonology + Block + (1|SimID), data= SimData, family= binomial)
Sim2.I.mod <- lmer(Sim2.I.Std$formula, Sim2.I.Std$data)
Sim2.afex.I <- mixed(Sim2.I.mod,
data = Sim2.I.Std$data,
family = binomial,
control = glmerControl(optimizer = "Nelder_Mead"),
method = 'LRT',
progress = FALSE,
check_contrasts = FALSE)
## Formula (the first argument) converted to formula.
summary(Sim2.afex.I) #Summary Table
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## IndCorr ~ Systematicity * Author * Phonology + Block + (1 | SimID)
## Data: Sim2.I.Std$data
## Control: glmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## 17562.7 17639.1 -8771.3 17542.7 15350
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4056 -0.7401 -0.3089 0.7258 3.6887
##
## Random effects:
## Groups Name Variance Std.Dev.
## SimID (Intercept) 0.08579 0.2929
## Number of obs: 15360, groups: SimID, 320
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.04830 0.02473 -1.95 0.0508 .
## SystematicityA -0.38512 0.02484 -15.51 < 2e-16 ***
## AuthorM -0.21543 0.02476 -8.70 < 2e-16 ***
## PhonologyD 0.66991 0.02508 26.71 < 2e-16 ***
## Block 0.78107 0.01972 39.61 < 2e-16 ***
## SystematicityA:AuthorM -0.12303 0.02474 -4.97 6.58e-07 ***
## SystematicityA:PhonologyD 0.06262 0.02473 2.53 0.0113 *
## AuthorM:PhonologyD -0.27555 0.02479 -11.12 < 2e-16 ***
## SystematicityA:AuthorM:PhonologyD -0.06127 0.02473 -2.48 0.0132 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SystmA AuthrM PhnlgD Block SyA:AM SyA:PD AtM:PD
## SystemtctyA 0.018
## AuthorM -0.023 0.011
## PhonologyD 0.000 -0.053 -0.030
## Block -0.012 -0.091 -0.051 0.160
## SystmtcA:AM 0.007 -0.020 0.019 -0.003 -0.033
## SystmtcA:PD -0.038 0.000 0.002 0.019 0.013 -0.021
## AthrM:PhnlD -0.020 0.010 0.005 -0.035 -0.066 -0.036 0.005
## SystA:AM:PD 0.003 -0.019 -0.037 0.003 -0.017 0.002 -0.024 0.019
Sim2.afex.I # Read Results from Here
## Mixed Model Anova Table (Type 3 tests, LRT-method)
##
## Model: IndCorr ~ Systematicity * Author * Phonology + Block + (1 | SimID)
## Data: $
## Data: Sim2.I.Std
## Data: data
## Df full model: 10
## Effect df Chisq p.value
## 1 Systematicity 1 182.67 *** <.0001
## 2 Author 1 68.63 *** <.0001
## 3 Phonology 1 388.13 *** <.0001
## 4 Block 1 1785.40 *** <.0001
## 5 Systematicity:Author 1 23.86 *** <.0001
## 6 Systematicity:Phonology 1 6.35 * .01
## 7 Author:Phonology 1 105.94 *** <.0001
## 8 Systematicity:Author:Phonology 1 6.08 * .01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(IndCorr ~ Block, data= SimData, f1))
## Block IndCorr.Mean IndCorr.SD IndCorr.SE
## 1 1 0.2549479 0.4358887 0.06892005
## 2 2 0.4463542 0.4971785 0.07861083
## 3 3 0.5851563 0.4927592 0.07791207
## 4 4 0.6723958 0.4694007 0.07421877
do.call(data.frame, aggregate(IndCorr ~ Systematicity, data= SimData, f1))
## Systematicity IndCorr.Mean IndCorr.SD IndCorr.SE
## 1 A 0.4128906 0.4923855 0.07785299
## 2 S 0.5665365 0.4955854 0.07835893
do.call(data.frame, aggregate(IndCorr ~ Author, data= SimData, f1))
## Author IndCorr.Mean IndCorr.SD IndCorr.SE
## 1 M 0.4470052 0.4972160 0.07861675
## 2 N 0.5324219 0.4989802 0.07889570
do.call(data.frame, aggregate(IndCorr ~ Phonology, data= SimData, f1))
## Phonology IndCorr.Mean IndCorr.SD IndCorr.SE
## 1 D 0.6250000 0.4841544 0.07655154
## 2 P 0.3544271 0.4783705 0.07563702
So that converged… good news
Now we just do it for Within-category error
Sim2.WIC.Std <- standardize(MonTrueCat ~ Systematicity * Author * Phonology + Block + (1|SimID),
data= SimData, family= binomial)
Sim2.WIC.mod <- lmer(Sim2.WIC.Std$formula, Sim2.WIC.Std$data)
Sim2.afex.WIC <- mixed(Sim2.WIC.mod,
data = Sim2.WIC.Std$data,
family = binomial,
control = glmerControl(optimizer = "Nelder_Mead"),
method = 'LRT',
progress = FALSE,
check_contrasts = FALSE)
## Formula (the first argument) converted to formula.
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.116122 (tol =
## 0.001, component 1)
summary(Sim2.afex.WIC) #Summary Table
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: MonTrueCat ~ Systematicity * Author * Phonology + Block + (1 |
## SimID)
## Data: Sim2.WIC.Std$data
## Control: glmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## 18490.4 18566.8 -9235.2 18470.4 15350
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0213 -0.7318 -0.4779 0.9393 3.0839
##
## Random effects:
## Groups Name Variance Std.Dev.
## SimID (Intercept) 0.06292 0.2508
## Number of obs: 15360, groups: SimID, 320
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.57599 0.02297 -25.080 < 2e-16 ***
## SystematicityA -0.20945 0.02283 -9.173 < 2e-16 ***
## AuthorM 0.15649 0.02282 6.857 7.03e-12 ***
## PhonologyD -0.44726 0.02291 -19.523 < 2e-16 ***
## Block -0.52562 0.01838 -28.605 < 2e-16 ***
## SystematicityA:AuthorM 0.05399 0.02281 2.367 0.0179 *
## SystematicityA:PhonologyD 0.17046 0.02283 7.468 8.16e-14 ***
## AuthorM:PhonologyD 0.18132 0.02283 7.943 1.97e-15 ***
## SystematicityA:AuthorM:PhonologyD -0.03446 0.02281 -1.511 0.1309
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SystmA AuthrM PhnlgD Block SyA:AM SyA:PD AtM:PD
## SystemtctyA 0.016
## AuthorM -0.052 -0.006
## PhonologyD 0.072 0.006 -0.044
## Block 0.103 0.039 -0.024 0.079
## SystmtcA:AM -0.006 -0.049 0.012 -0.006 -0.010
## SystmtcA:PD -0.002 0.060 -0.004 0.008 -0.034 -0.041
## AthrM:PhnlD -0.045 -0.007 0.063 -0.052 -0.029 0.002 -0.004
## SystA:AM:PD -0.004 -0.041 0.002 -0.005 0.007 0.062 -0.049 0.011
Sim2.afex.WIC # Read Results from Here
## Warning: lme4 reported (at least) the following warnings for 'Block':
## * Model failed to converge with max|grad| = 0.116122 (tol = 0.001, component 1)
## Mixed Model Anova Table (Type 3 tests, LRT-method)
##
## Model: MonTrueCat ~ Systematicity * Author * Phonology + Block + (1 |
## Model: SimID)
## Data: $
## Data: Sim2.WIC.Std
## Data: data
## Df full model: 10
## Effect df Chisq p.value
## 1 Systematicity 1 75.35 *** <.0001
## 2 Author 1 44.20 *** <.0001
## 3 Phonology 1 258.27 *** <.0001
## 4 Block 1 868.96 *** <.0001
## 5 Systematicity:Author 1 5.56 * .02
## 6 Systematicity:Phonology 1 51.65 *** <.0001
## 7 Author:Phonology 1 58.22 *** <.0001
## 8 Systematicity:Author:Phonology 1 2.27 .13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(MonTrueCat ~ Block, data= SimData, f1))
## Block MonTrueCat.Mean MonTrueCat.SD MonTrueCat.SE
## 1 1 0.5408854 0.4983905 0.07880245
## 2 2 0.4119792 0.4922555 0.07783242
## 3 3 0.3067708 0.4612135 0.07292426
## 4 4 0.2484375 0.4321631 0.06833099
do.call(data.frame, aggregate(MonTrueCat ~ Systematicity, data= SimData, f1))
## Systematicity MonTrueCat.Mean MonTrueCat.SD MonTrueCat.SE
## 1 A 0.3303385 0.4703656 0.07437133
## 2 S 0.4236979 0.4941759 0.07813607
do.call(data.frame, aggregate(MonTrueCat ~ Author, data= SimData, f1))
## Author MonTrueCat.Mean MonTrueCat.SD MonTrueCat.SE
## 1 M 0.4057292 0.4910646 0.07764413
## 2 N 0.3483073 0.4764650 0.07533573
do.call(data.frame, aggregate(MonTrueCat ~ Phonology, data= SimData, f1))
## Phonology MonTrueCat.Mean MonTrueCat.SD MonTrueCat.SE
## 1 D 0.2830729 0.4505209 0.07123361
## 2 P 0.4709635 0.4991887 0.07892866
#Aggregate Data
Sim2Ind <- aggregate(IndCorr ~ SimID * Block * Systematicity * Author * Phonology, data = SimData, mean)
Sim2Cat <- aggregate(MonCatCorr ~ SimID * Block * Systematicity * Author * Phonology, data = SimData, mean)
Sim2WIC <- aggregate(MonTrueCat ~ SimID * Block * Systematicity * Author * Phonology, data = SimData, mean)
#Add a coding colum
Sim2Ind$Type <- "Individuation"
Sim2Cat$Type <- "Categorisation"
Sim2WIC$Type <- "Within-Category Error"
#Give columns the same names
colnames(Sim2Ind) <- c("SimID", "Block", "Systematicity", "Author", "Phonology", "Correctness", "Type")
colnames(Sim2Cat) <- c("SimID", "Block", "Systematicity", "Author", "Phonology", "Correctness", "Type")
colnames(Sim2WIC) <- c("SimID", "Block", "Systematicity", "Author", "Phonology", "Correctness", "Type")
#Rowbind into a single data frame
Sim2Agg <- rbind(Sim2Ind, Sim2Cat)
#Rename some stuff
Sim2Agg$Systematicity <- revalue(Sim2Agg$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
Sim2Agg$Author <- revalue(Sim2Agg$Author, c("M" = "Monaghan", "N" = "Nielsen"))
Sim2Agg$Phonology <- revalue(Sim2Agg$Phonology, c("P" = "Clustered", "D" = "Dispersed"))
Sim2Agg2 <- do.call(data.frame, aggregate(Correctness ~ Systematicity * Block * Type * Author * Phonology, data= Sim2Agg, f1))
colnames(Sim2Agg2) <- c("Systematicity", "Block", "Type", "Author", "Phonology", "Correctness", "SD", "SE")
Sim2Agg2 <- subset(Sim2Agg2, select = c("Block", "Systematicity", "Author", "Phonology", "Type", "Correctness", "SE"))
ggplot(data=Sim2Agg2, aes(x=Block, y= Correctness)) +
geom_line(stat = "identity",
aes(col = Systematicity,
linetype = Phonology),
size = 1.2) +
geom_errorbar(aes(ymin = Correctness - SE,
ymax = Correctness + SE,
width = 0.1)) +
labs(x="Block", y="Proportion Correct") +
facet_grid(Type ~ Author) +
theme(strip.text.y = element_text(angle = 0))+
theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
theme_alan() +
theme(legend.title=element_blank()) +
scale_colour_manual(values = c("black", "#666666")) +
scale_linetype_manual(values = c("dotted", "solid"))
#And then we need a graph of Within-category error
Sim2Agg3 <- do.call(data.frame, aggregate(Correctness ~ Systematicity * Block * Type * Author * Phonology, data = Sim2WIC, f1))
colnames(Sim2Agg3) <- c("Systematicity", "Block", "Type", "Author", "Phonology", "Correctness", "SD", "SE")
Sim2Agg3 <- subset(Sim2Agg3, select = c("Block", "Systematicity", "Author", "Phonology", "Type", "Correctness", "SE"))
Sim2Agg3$Systematicity <- revalue(Sim2Agg3$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
Sim2Agg3$Author <- revalue(Sim2Agg3$Author, c("M" = "Monaghan", "N" = "Nielsen"))
Sim2Agg3$Phonology <- revalue(Sim2Agg3$Phonology, c("P" = "Clustered", "D" = "Dispersed"))
ggplot(data=Sim2Agg3, aes(x=Block, y= Correctness)) +
geom_line(stat = "identity",
aes(col = Systematicity,
linetype = Phonology),
size = 1.2) +
geom_errorbar(aes(ymin = Correctness - SE,
ymax = Correctness + SE,
width = 0.1)) +
labs(x="Block", y="Proportion of Within-Category Errors") +
facet_wrap( ~ Author, nrow = 1) +
theme(strip.text.y = element_text(angle = 0))+
theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
theme_alan() +
theme(legend.title=element_blank()) +
scale_colour_manual(values = c("black", "#666666")) +
scale_linetype_manual(values = c("dotted", "solid"))
Now we can start looking at the experiment data
The first thing we want to do is take a look at the similarity of words to each other in all four of the experimental conditions
ICAgg <- aggregate(ICDist ~ ID * Phon * Sys, data = Exp1, mean)
colnames(ICAgg) <- c("ID", "Phonology", "Systematicity", "Distance" )
ICAgg$Type <- "Within-Class"
OCAgg <- aggregate(OCDist ~ ID * Phon * Sys, data = Exp1, mean)
colnames(OCAgg) <- c("ID", "Phonology", "Systematicity", "Distance" )
OCAgg$Type <- "Between-Class"
EucAgg <- rbind(OCAgg, ICAgg )
EucAgg$Systematicity <- revalue(EucAgg$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
EucAgg$Phonology <- factor(EucAgg$Phonology, level = c("P", "D"), labels = c("Clustered", "Dispersed"))
EucAgg$Type <- factor(EucAgg$Type, level = c("Within-Class", "Between-Class"))
EucAgg$Condition <- paste(EucAgg$Systematicity,EucAgg$Phonology)
EucAgg$Condition <- factor(EucAgg$Condition,
level = c("Arbitrary Clustered", "Systematic Clustered", "Arbitrary Dispersed", "Systematic Dispersed"),
labels = c("Clustered", " Clustered", "Dispersed", " Dispersed"))
ggplot(data=EucAgg, aes(x= Condition, y=Distance)) +
geom_bar(position= position_dodge(1),
aes(fill = Systematicity),
stat = "identity", width = .6) +
geom_jitter(height = 0, size = 1.2, colour = 'darkgray') +
labs(x="", y="Euclidean Distance") +
facet_wrap( ~ Type) +
theme_alan() +
theme(strip.text.y = element_text(hjust= 0)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
scale_fill_manual(values = c("black", "#666666")) +
theme(legend.title=element_blank())
SO that’s Euclidean distances. Let’s start taking a look at the actual statistics
# Get counts of hits and false alarms for each participant
Exp1Hits <- subset(Exp1, RespType == "H")
Exp1FAs <- subset(Exp1, RespType == "FA")
HitAgg <- data.frame(xtabs(~ ID, data= Exp1Hits))
colnames(HitAgg) <- c("ID", "Hits")
FAAgg <- data.frame(xtabs(~ ID, data= Exp1FAs))
colnames(FAAgg) <- c("ID", "FAs")
#Append those into a single dataframe with condition data
Exp1Agg <- aggregate(Correctness ~ ID * Sys * Phon, data= Exp1, mean)
Exp1Agg <- Exp1Agg[order(Exp1Agg$ID),]
Exp1Agg <- cbind(Exp1Agg, HitAgg)
Exp1Agg <- cbind(Exp1Agg, FAAgg)
Exp1Agg <- subset(Exp1Agg, select = c(ID, Sys, Phon, Correctness, Hits, FAs))
#Calculate pHit and pFA
Exp1Agg$pHit <- Exp1Agg$Hits / 12
Exp1Agg$pFA <- Exp1Agg$FAs / 36
#Calculate D Prime
Exp1Agg$DPrime <- qnorm(Exp1Agg$pHit) - qnorm(Exp1Agg$pFA)
Exp1Agg$C <- 0.5 * ((qnorm(Exp1Agg$pHit) + qnorm(Exp1Agg$pFA)))
#This produces impossible values of INF becuase there are participants with 100% hits or 0% False Alarms- we need to rectify that
Exp1Agg$Hits <- mapvalues(Exp1Agg$Hits, from = c(12, 0), to = c(11.5, 0.5))
## The following `from` values were not present in `x`: 0
Exp1Agg$FAs <- mapvalues(Exp1Agg$FAs, from = c(36, 0), to = c(35.5, 0.5))
## The following `from` values were not present in `x`: 36
Exp1Agg$pHit <- Exp1Agg$Hits / 12
Exp1Agg$pFA <- Exp1Agg$FAs / 36
Exp1Agg$DPrime <- qnorm(Exp1Agg$pHit) - qnorm(Exp1Agg$pFA)
Exp1Agg$C <- 0.5 * ((qnorm(Exp1Agg$pHit) + qnorm(Exp1Agg$pFA)))
# Now we can plot the results
colnames(Exp1Agg) <- c("ID", "Systematicity", "Phonology", "Correctness", "Hits", "FAs", "pHit", "pFA", "DPrime", "C" )
Exp1Agg$Systematicity <- revalue(Exp1Agg$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
Exp1Agg$Phonology <- factor(Exp1Agg$Phonology, level = c("P", "D"), labels = c("Clustered", "Dispersed"))
Exp1Agg$Condition <- paste(Exp1Agg$Systematicity,Exp1Agg$Phonology)
Exp1Agg$Condition <- factor(Exp1Agg$Condition,
level = c("Arbitrary Clustered", "Systematic Clustered", "Arbitrary Dispersed", "Systematic Dispersed"),
labels = c("Clustered", " Clustered", "Dispersed", " Dispersed"))
ggplot(data=Exp1Agg, aes(x= Condition, y=DPrime)) +
geom_bar(position= position_dodge(width = 1),
aes(fill = Systematicity),
stat = "summary", fun.y = "mean",
width = .6) +
geom_jitter(height = 0, size = 1.2, colour = 'darkgray') +
labs(x="", y="D Prime Performance") +
theme_alan() +
theme(strip.text.y = element_text(hjust= 0)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
scale_fill_manual(values = c("black", "#666666")) +
theme(legend.title=element_blank())
#And some stats on this data:
#unfortunately afex does not work with aggregated data formats, and d prime is always an aggregate measure, so we need to do something else
# Unfortunately I can't figure out how to get D Prime to work with a random effect- becaues D prime is inherent aggregated I cant' seem to add a by-participant random effect.
Exp1D.model <- glm(DPrime ~ Phonology * Systematicity , data = Exp1Agg)
summary(Exp1D.model)
##
## Call:
## glm(formula = DPrime ~ Phonology * Systematicity, data = Exp1Agg)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.60362 -0.38476 -0.09698 0.44399 2.11637
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.4258 0.2348 1.813
## PhonologyDispersed 0.1768 0.3321 0.532
## SystematicitySystematic 1.3899 0.3321 4.186
## PhonologyDispersed:SystematicitySystematic -0.9515 0.4696 -2.026
## Pr(>|t|)
## (Intercept) 0.078146 .
## PhonologyDispersed 0.597680
## SystematicitySystematic 0.000175 ***
## PhonologyDispersed:SystematicitySystematic 0.050226 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.5513871)
##
## Null deviance: 31.364 on 39 degrees of freedom
## Residual deviance: 19.850 on 36 degrees of freedom
## AIC: 95.488
##
## Number of Fisher Scoring iterations: 2
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(20))
do.call(data.frame, aggregate(DPrime ~ Systematicity, data= Exp1Agg, f1))
## Systematicity DPrime.Mean DPrime.SD DPrime.SE
## 1 Arbitrary 0.5141776 0.6750417 0.1509439
## 2 Systematic 1.4283827 0.8690203 0.1943188
do.call(data.frame, aggregate(DPrime ~ Phonology, data= Exp1Agg, f1))
## Phonology DPrime.Mean DPrime.SD DPrime.SE
## 1 Clustered 1.1207379 1.1079829 0.2477525
## 2 Dispersed 0.8218224 0.6132734 0.1371321
So nowe we can start doing the real meaty stats for the Experiment - an analysis that includes performance on the different trial types
Exp1.Std <- standardize(Correctness ~ Sys * Phon * TrialType + (1 + TrialType|ID),
data= Exp1, family= binomial)
Exp1.Mod <- lmer(Exp1.Std$formula, Exp1.Std$data)
Exp1.afex <- mixed(Exp1.Mod,
data = Exp1.Std$data,
family = binomial,
control = glmerControl(optimizer = "Nelder_Mead"),
method = 'LRT',
progress = FALSE)
## Formula (the first argument) converted to formula.
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00252673 (tol =
## 0.001, component 1)
summary(Exp1.afex) #Summary Table
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Correctness ~ Sys * Phon * TrialType + (1 + TrialType | ID)
## Data: Exp1.Std$data
## Control: glmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## 2253.0 2353.1 -1108.5 2217.0 1902
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.4681 -0.9285 0.4001 0.7383 1.8966
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 0.1749 0.4182
## TrialTypeIC 0.4664 0.6829 -0.04
## TrialTypeOC 0.1673 0.4090 0.36 -0.70
## Number of obs: 1920, groups: ID, 40
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.71216 0.08850 8.047 8.48e-16 ***
## SysA -0.31852 0.08817 -3.613 0.000303 ***
## PhonD -0.12327 0.08735 -1.411 0.158199
## TrialTypeIC -0.65352 0.13562 -4.819 1.45e-06 ***
## TrialTypeOC 0.52334 0.10377 5.043 4.58e-07 ***
## SysA:PhonD 0.17400 0.08735 1.992 0.046369 *
## SysA:TrialTypeIC 0.60921 0.13550 4.496 6.92e-06 ***
## SysA:TrialTypeOC -0.51127 0.10293 -4.967 6.79e-07 ***
## PhonD:TrialTypeIC 0.14023 0.13491 1.039 0.298608
## PhonD:TrialTypeOC -0.12244 0.10107 -1.211 0.225731
## SysA:PhonD:TrialTypeIC -0.28260 0.13493 -2.094 0.036224 *
## SysA:PhonD:TrialTypeOC 0.20921 0.10114 2.069 0.038588 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SysA PhonD TrlTIC TrlTOC SyA:PD SA:TTI SA:TTO PD:TTI
## SysA -0.108
## PhonD -0.053 0.054
## TrialTypeIC -0.043 0.059 0.029
## TrialTypeOC 0.136 -0.080 -0.044 -0.560
## SysA:PhonD 0.056 -0.052 -0.089 -0.033 0.048
## SysA:TrlTIC 0.059 -0.036 -0.032 -0.054 0.061 0.029
## SysA:TrlTOC -0.079 0.135 0.047 0.061 -0.220 -0.044 -0.565
## PhnD:TrlTIC 0.030 -0.033 -0.028 -0.025 0.032 0.049 0.024 -0.032
## PhnD:TrlTOC -0.046 0.049 0.124 0.033 -0.115 -0.066 -0.033 0.120 -0.568
## SyA:PD:TTIC -0.034 0.030 0.049 0.024 -0.033 -0.028 -0.024 0.032 -0.046
## SyA:PD:TTOC 0.050 -0.046 -0.066 -0.034 0.122 0.125 0.033 -0.115 0.053
## PD:TTO SA:PD:TTI
## SysA
## PhonD
## TrialTypeIC
## TrialTypeOC
## SysA:PhonD
## SysA:TrlTIC
## SysA:TrlTOC
## PhnD:TrlTIC
## PhnD:TrlTOC
## SyA:PD:TTIC 0.053
## SyA:PD:TTOC -0.187 -0.568
Exp1.afex # Read Results from Here
## Warning: lme4 reported (at least) the following warnings for 'Sys':
## * Model failed to converge with max|grad| = 0.00252673 (tol = 0.001, component 1)
## Mixed Model Anova Table (Type 3 tests, LRT-method)
##
## Model: Correctness ~ Sys * Phon * TrialType + (1 + TrialType | ID)
## Data: $
## Data: Exp1.Std
## Data: data
## Df full model: 18
## Effect df Chisq p.value
## 1 Sys 1 11.56 *** .0007
## 2 Phon 1 1.95 .16
## 3 TrialType 2 23.83 *** <.0001
## 4 Sys:Phon 1 3.79 + .05
## 5 Sys:TrialType 2 22.62 *** <.0001
## 6 Phon:TrialType 2 1.58 .45
## 7 Sys:Phon:TrialType 2 4.94 + .08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(Correctness ~ TrialType, data= Exp1, f1))
## TrialType Correctness.Mean Correctness.SD Correctness.SE
## 1 IC 0.5166667 0.5002435 0.07909544
## 2 OC 0.7218750 0.4483089 0.07088386
## 3 T 0.6770833 0.4680790 0.07400979
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(20))
do.call(data.frame, aggregate(Correctness ~ Sys, data= Exp1, f1))
## Sys Correctness.Mean Correctness.SD Correctness.SE
## 1 A 0.5916667 0.4917816 0.10996571
## 2 S 0.7270833 0.4456906 0.09965944
do.call(data.frame, aggregate(Correctness ~ Phon, data= Exp1, f1))
## Phon Correctness.Mean Correctness.SD Correctness.SE
## 1 D 0.6447917 0.4788259 0.1070687
## 2 P 0.6739583 0.4690071 0.1048732
#we can use lsmeans for our post-hoc tests
# Post hoc test of trial type
(Exp1.LS <- lsmeans(Exp1.afex, "TrialType"))
## NOTE: Results may be misleading due to involvement in interactions
## TrialType lsmean SE df asymp.LCL asymp.UCL
## IC 0.05864515 0.1587193 NA -0.2524390 0.3697293
## OC 1.23549550 0.1452580 NA 0.9507950 1.5201959
## T 0.84234063 0.1406081 NA 0.5667539 1.1179274
##
## Results are averaged over the levels of: Sys, Phon
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
pairs(Exp1.LS)
## contrast estimate SE df z.ratio p.value
## IC - OC -1.1768503 0.2119451 NA -5.553 <.0001
## IC - T -0.7836955 0.2298352 NA -3.410 0.0019
## OC - T 0.3931549 0.1730689 NA 2.272 0.0598
##
## Results are averaged over the levels of: Sys, Phon
## Results are given on the log odds ratio (not the response) scale.
## P value adjustment: tukey method for comparing a family of 3 estimates
contrast(pairs(Exp1.LS), by = NULL, adjust = "holm")
## contrast estimate SE df z.ratio p.value
## IC - OC effect -0.6543867 0.15232667 NA -4.296 <.0001
## IC - T effect -0.2612318 0.07661174 NA -3.410 0.0007
## OC - T effect 0.9156185 0.16790683 NA 5.453 <.0001
##
## Results are averaged over the levels of: Sys, Phon
## P value adjustment: holm method for 3 tests
#Post hoc test of Systematicity x Trial Type Interaction
(Exp1.LS.2 <- lsmeans(Exp1.afex, c("TrialType", "Sys")))
## NOTE: Results may be misleading due to involvement in interactions
## TrialType Sys lsmean SE df asymp.LCL asymp.UCL
## IC A 0.3493356 0.2227875 NA -0.08731996 0.7859911
## OC A 0.4057044 0.1805994 NA 0.05173604 0.7596727
## T A 0.4258796 0.1851401 NA 0.06301162 0.7887475
## IC S -0.2320453 0.2265179 NA -0.67601216 0.2119216
## OC S 2.0652866 0.2262789 NA 1.62178807 2.5087852
## T S 1.2588017 0.2097993 NA 0.84760271 1.6700007
##
## Results are averaged over the levels of: Phon
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
pairs(Exp1.LS.2)
## contrast estimate SE df z.ratio p.value
## IC,A - OC,A -0.05636879 0.2819699 NA -0.200 1.0000
## IC,A - T,A -0.07654398 0.3155035 NA -0.243 0.9999
## IC,A - IC,S 0.58138085 0.3179968 NA 1.828 0.4474
## IC,A - OC,S -1.71595106 0.3175645 NA -5.403 <.0001
## IC,A - T,S -0.90946614 0.3060855 NA -2.971 0.0352
## OC,A - T,A -0.02017519 0.2126513 NA -0.095 1.0000
## OC,A - IC,S 0.63774964 0.2896901 NA 2.201 0.2369
## OC,A - OC,S -1.65958226 0.2885083 NA -5.752 <.0001
## OC,A - T,S -0.85309735 0.2768714 NA -3.081 0.0252
## T,A - IC,S 0.65792483 0.2925153 NA 2.249 0.2154
## T,A - OC,S -1.63940708 0.2925126 NA -5.605 <.0001
## T,A - T,S -0.83292216 0.2783930 NA -2.992 0.0331
## IC,S - OC,S -2.29733191 0.3158595 NA -7.273 <.0001
## IC,S - T,S -1.49084699 0.3333550 NA -4.472 0.0001
## OC,S - T,S 0.80648492 0.2703870 NA 2.983 0.0340
##
## Results are averaged over the levels of: Phon
## Results are given on the log odds ratio (not the response) scale.
## P value adjustment: tukey method for comparing a family of 6 estimates
contrast(pairs(Exp1.LS.2), by = NULL, adjust = "holm")
## contrast estimate SE df z.ratio p.value
## IC,A - OC,A effect 0.5348414 0.2785234 NA 1.920 0.2741
## IC,A - T,A effect 0.5146662 0.2928471 NA 1.757 0.3154
## IC,A - IC,S effect 1.1725910 0.2858534 NA 4.102 0.0004
## IC,A - OC,S effect -1.1247409 0.2359900 NA -4.766 <.0001
## IC,A - T,S effect -0.3182560 0.2092398 NA -1.521 0.3848
## OC,A - T,A effect 0.5710350 0.2208227 NA 2.586 0.0583
## OC,A - IC,S effect 1.2289598 0.2860693 NA 4.296 0.0002
## OC,A - OC,S effect -1.0683721 0.2353853 NA -4.539 0.0001
## OC,A - T,S effect -0.2618872 0.2099425 NA -1.247 0.4245
## T,A - IC,S effect 1.2491350 0.3087585 NA 4.046 0.0005
## T,A - OC,S effect -1.0481969 0.2637880 NA -3.974 0.0006
## T,A - T,S effect -0.2417120 0.2382689 NA -1.014 0.4245
## IC,S - OC,S effect -1.7061217 0.2990226 NA -5.706 <.0001
## IC,S - T,S effect -0.8996368 0.3098738 NA -2.903 0.0259
## OC,S - T,S effect 1.3976951 0.2893984 NA 4.830 <.0001
##
## Results are averaged over the levels of: Phon
## P value adjustment: holm method for 15 tests
do.call(data.frame, aggregate(Correctness ~ TrialType, data= subset(Exp1, Sys == "A"), f1))
## TrialType Correctness.Mean Correctness.SD Correctness.SE
## 1 IC 0.5791667 0.4947246 0.1106238
## 2 OC 0.5937500 0.4916447 0.1099351
## 3 T 0.6000000 0.4909218 0.1097734
do.call(data.frame, aggregate(Correctness ~ TrialType, data= subset(Exp1, Sys == "S"), f1))
## TrialType Correctness.Mean Correctness.SD Correctness.SE
## 1 IC 0.4541667 0.4989354 0.11156535
## 2 OC 0.8500000 0.3574440 0.07992690
## 3 T 0.7541667 0.4314801 0.09648187
So those are the stats as they stand - I think they all make pretty good sense, assuming I’ve done and interpreted them correctly! Now lets visualise them
f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(10))
Exp1Agg.2 <- do.call(data.frame, aggregate(Correctness ~ TrialType * Sys * Phon, data= Exp1, f1))
colnames(Exp1Agg.2) <- c("TrialType", "Systematicity", "Phonology", "Correctness", "SD", "SE" )
Exp1Agg.2$Systematicity <- revalue(Exp1Agg.2$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
Exp1Agg.2$Phonology <- factor(Exp1Agg.2$Phonology, level = c("P", "D"), labels = c("Clustered", "Dispersed"))
Exp1Agg.2$TrialType <- factor(Exp1Agg.2$TrialType, level = c("T", "IC", "OC"), labels = c("Target", "In-Class", "Out-of-Class"))
Exp1Agg.2$Condition <- paste(Exp1Agg.2$Systematicity,Exp1Agg.2$Phonology)
Exp1Agg.2$Condition <- factor(Exp1Agg.2$Condition,
level = c("Arbitrary Clustered", "Systematic Clustered", "Arbitrary Dispersed", "Systematic Dispersed"),
labels = c("Arbitrary Clustered", "Systematic Clustered", "Arbitrary Dispersed", "Systematic Dispersed"))
ggplot(data=Exp1Agg.2, aes(x= TrialType, y=Correctness, fill = Systematicity)) +
geom_bar(position= position_dodge(.7),
stat = "identity", fun.y = "mean",
width = .5,
aes(colour = Phonology, linetype = Phonology),
size = 1.5) +
geom_errorbar(position = position_dodge(.7),
aes(ymin = Correctness - SE,
ymax = Correctness + SE,
width = 0.2,
linetype = Phonology),
size = 1.2,
colour = 'black') +
labs(x="", y="Proportion Correct") +
theme_alan() +
theme(strip.text.y = element_text(hjust= 0)) +
theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
scale_fill_manual(values = c("black", "#666666")) +
theme(legend.title=element_blank()) +
scale_color_manual(values = c("gray70", "ghostwhite"))
## Warning: Ignoring unknown parameters: fun.y
ConfxEuc <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/ConfxEuc.csv")
ConfxEuc <- subset(ConfxEuc, select = c(Phon1, Phon2, Source, Type, Euc, Confusion))
colnames(ConfxEuc) <- c("Phoneme1","Phoneme2", "Source", "Type", "Euclidean", "Confusability")
ConfxEuc$Pair <- paste(ConfxEuc$Phoneme1, ConfxEuc$Phoneme2, sep = ',')
(ConfxEuc.corr <- cor.test(ConfxEuc$Euclidean, ConfxEuc$Confusability))
##
## Pearson's product-moment correlation
##
## data: ConfxEuc$Euclidean and ConfxEuc$Confusability
## t = -0.68922, df = 22, p-value = 0.4979
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5183772 0.2740896
## sample estimates:
## cor
## -0.1453815
ggplot(data=ConfxEuc, aes(x= Euclidean, y=Confusability)) +
geom_point(aes(colour= Source, shape = Type), size = 2) +
geom_text(aes(label = Pair), hjust = 1.5, size = 3) +
annotate(geom = "text", x= 3.5, y= 5, label = "r= -0.14, p = 0.498") +
geom_smooth(method = 'lm', col = 'black', se= FALSE)+
labs(x="Euclidean Distance", y="Confusability") +
theme_alan() +
theme(strip.text.y = element_text(hjust= 0)) +
theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
theme(legend.title=element_blank())