SimData <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Exp4Sim.csv")

Exp1 <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/Exp4Clean.csv")

We’ll do the analysis in the same order as in the paper.

So we start with Simulation 1

SimData$Author <- factor(SimData$Author)
SimData$Systematicity <- factor(SimData$Systematicity)
SimData$Phonology <- factor(SimData$Phonology)
SimData$BlockMinus <- SimData$Block - 1
SimData$BlockMinus <- as.numeric(SimData$BlockMinus)
SimData$MonCatCorr <- as.numeric(SimData$MonCatCorr)
SimData$IndCorr <- as.numeric(SimData$IndCorr)


SimData <- droplevels.data.frame(SimData)


str(SimData)

## 'data.frame':    15360 obs. of  24 variables:
##  $ Author       : Factor w/ 2 levels "M","N": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Systematicity: Factor w/ 2 levels "A","S": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Phonology    : Factor w/ 2 levels "D","P": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Condition    : Factor w/ 8 levels "MAD","MAP","MSD",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ Within       : num  6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 ...
##  $ Between      : num  6.26 6.26 6.26 6.26 6.26 6.26 6.26 6.26 6.26 6.26 ...
##  $ PhonWithin   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ PhonBetween  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Simnum       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ SimID        : Factor w/ 320 levels "MAD1","MAD10",..: 201 201 201 201 201 201 201 201 201 201 ...
##  $ Block        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LexItemNum   : int  2 1 7 11 8 6 9 12 3 10 ...
##  $ OutputCat    : num  0.25 0.25 0.75 0.75 0.75 0.25 0.75 0.75 0.25 0.75 ...
##  $ DiffOutputcat: num  0.197 0.197 0.197 0.235 0.197 ...
##  $ IndError     : int  1 1 0 0 1 1 1 1 1 1 ...
##  $ MonCatError  : int  1 1 0 0 0 1 0 0 1 0 ...
##  $ DCatError    : int  1 1 0 0 0 1 0 0 1 0 ...
##  $ CatDiff      : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
##  $ IndCorr      : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ MonCatCorr   : num  0 0 1 1 1 0 1 1 0 1 ...
##  $ DCatCorr     : int  0 0 1 1 1 0 1 1 0 1 ...
##  $ MonTrueCat   : int  0 0 0 0 1 0 1 1 0 1 ...
##  $ DTrueCat     : int  0 0 0 0 1 0 1 1 0 1 ...
##  $ BlockMinus   : num  0 0 0 0 0 0 0 0 0 0 ...

#Simulation 1 uses only Monaghan and only compares Systematic to Arbitrary languages (no Phonological Dispersion, no Nielsen Phonemes)
Sim1Data <- subset(SimData, Author == "M")
Sim1Data <- subset(Sim1Data, Phonology == "P")

Sim1.afex.C <- mixed(MonCatCorr ~ Systematicity * BlockMinus + (1 + BlockMinus|SimID),
                   data = Sim1Data, 
                   family = binomial,
                   control = glmerControl(optimizer = "bobyqa"),
                   method = 'LRT',
                   progress = FALSE)

## Contrasts set to contr.sum for the following variables: Systematicity, SimID

## Numerical variables NOT centered on 0 (i.e., interpretation of all main effects might be difficult if in interactions): BlockMinus

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 2 negative
## eigenvalues

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

summary(Sim1.afex.C)  #Summary Table

## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX

## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## MonCatCorr ~ Systematicity * BlockMinus + (1 + BlockMinus | SimID)
##    Data: Sim1Data
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   2437.1   2480.9  -1211.6   2423.1     3833 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.67435  0.00003  0.00003  0.59725  0.83635 
## 
## Random effects:
##  Groups Name        Variance  Std.Dev.  Corr
##  SimID  (Intercept) 0.000e+00 0.000e+00     
##         BlockMinus  1.337e-14 1.156e-07  NaN
## Number of obs: 3840, groups:  SimID, 80
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)
## (Intercept)                10.4617   558.1661   0.019    0.985
## Systematicity1            -10.1043   558.1661  -0.018    0.986
## BlockMinus                  0.1122   298.3519   0.000    1.000
## Systematicity1:BlockMinus   0.1122   298.3519   0.000    1.000
## 
## Correlation of Fixed Effects:
##             (Intr) Systm1 BlckMn
## Systemtcty1 -1.000              
## BlockMinus  -0.802  0.802       
## Systmtc1:BM  0.802 -0.802 -1.000
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 2 negative eigenvalues

Sim1.afex.C$anova_table  # Read Results from Here

## Mixed Model Anova Table (Type 3 tests, LRT-method)
## 
## Model: MonCatCorr ~ Systematicity * BlockMinus + (1 + BlockMinus | SimID)
## Data: Sim1Data
## Df full model: 7
##                          Df  Chisq Chi Df Pr(>Chisq)    
## Systematicity             6 197.46      1     <2e-16 ***
## BlockMinus                6   0.00      1     0.9999    
## Systematicity:BlockMinus  6   0.00      1     0.9999    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(MonCatCorr ~ Systematicity, data= Sim1Data, f1))

##   Systematicity MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1             A       0.6645833     0.4722589    0.07467069
## 2             S       1.0000000     0.0000000    0.00000000

do.call(data.frame, aggregate(MonCatCorr ~ Block, data= Sim1Data, f1))

##   Block MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1     1       0.8031250     0.3978443    0.06290471
## 2     2       0.8093750     0.3929987    0.06213855
## 3     3       0.8416667     0.3652435    0.05775008
## 4     4       0.8750000     0.3308913    0.05231851

do.call(data.frame, aggregate(MonCatCorr ~ Systematicity * Block, data= Sim1Data, f1))

##   Systematicity Block MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1             A     1       0.6062500     0.4890903    0.07733196
## 2             S     1       1.0000000     0.0000000    0.00000000
## 3             A     2       0.6187500     0.4862005    0.07687505
## 4             S     2       1.0000000     0.0000000    0.00000000
## 5             A     3       0.6833333     0.4656615    0.07362755
## 6             S     3       1.0000000     0.0000000    0.00000000
## 7             A     4       0.7500000     0.4334645    0.06853675
## 8             S     4       1.0000000     0.0000000    0.00000000

Sim1.afex.I <- mixed(IndCorr ~ Systematicity * BlockMinus + (1 + BlockMinus|SimID),
                   data = Sim1Data, 
                   family = binomial,
                   control = glmerControl(optimizer = "bobyqa"),
                   method = 'LRT',
                   progress = FALSE)

## Contrasts set to contr.sum for the following variables: Systematicity, SimID

## Numerical variables NOT centered on 0 (i.e., interpretation of all main effects might be difficult if in interactions): BlockMinus

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

## Warning: contrasts dropped from factor SimID due to missing levels

## Warning: contrasts dropped from factor SimID due to missing levels

summary(Sim1.afex.I)  #Summary Table

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: IndCorr ~ Systematicity * BlockMinus + (1 + BlockMinus | SimID)
##    Data: Sim1Data
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   4657.2   4701.0  -2321.6   4643.2     3833 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5226 -0.7126 -0.4947  0.9378  2.7694 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  SimID  (Intercept) 4.19e-02 0.204699     
##         BlockMinus  5.09e-05 0.007135 1.00
## Number of obs: 3840, groups:  SimID, 80
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -1.33307    0.06982 -19.094  < 2e-16 ***
## Systematicity1            -0.45735    0.06943  -6.587 4.48e-11 ***
## BlockMinus                 0.48044    0.03306  14.532  < 2e-16 ***
## Systematicity1:BlockMinus -0.01152    0.03297  -0.349    0.727    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Systm1 BlckMn
## Systemtcty1  0.190              
## BlockMinus  -0.783 -0.151       
## Systmtc1:BM -0.150 -0.781  0.122

Sim1.afex.I$anova_table  # Read Results from Here

## Mixed Model Anova Table (Type 3 tests, LRT-method)
## 
## Model: IndCorr ~ Systematicity * BlockMinus + (1 + BlockMinus | SimID)
## Data: Sim1Data
## Df full model: 7
##                          Df    Chisq Chi Df Pr(>Chisq)    
## Systematicity             6  39.8939      1  2.681e-10 ***
## BlockMinus                6 154.2932      1  < 2.2e-16 ***
## Systematicity:BlockMinus  6   0.1219      1      0.727    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(IndCorr ~ Systematicity, data= Sim1Data, f1))

##   Systematicity IndCorr.Mean IndCorr.SD IndCorr.SE
## 1             A    0.2661458  0.4420566 0.06989528
## 2             S    0.4682292  0.4991196 0.07891774

do.call(data.frame, aggregate(IndCorr ~ Block, data= Sim1Data, f1))

##   Block IndCorr.Mean IndCorr.SD IndCorr.SE
## 1     1    0.2104167  0.4078170 0.06448154
## 2     2    0.3197917  0.4666388 0.07378207
## 3     3    0.4229167  0.4942799 0.07815252
## 4     4    0.5156250  0.5000163 0.07905952

do.call(data.frame, aggregate(IndCorr ~ Systematicity * Block, data= Sim1Data, f1))

##   Systematicity Block IndCorr.Mean IndCorr.SD IndCorr.SE
## 1             A     1    0.1437500  0.3512021 0.05552992
## 2             S     1    0.2770833  0.4480249 0.07083896
## 3             A     2    0.2166667  0.4124034 0.06520670
## 4             S     2    0.4229167  0.4945378 0.07819330
## 5             A     3    0.2958333  0.4568926 0.07224107
## 6             S     3    0.5500000  0.4980128 0.07874273
## 7             A     4    0.4083333  0.4920382 0.07779807
## 8             S     4    0.6229167  0.4851617 0.07671080

Now with those stats in hand we can output some graphs

#Aggregate Data
Sim1Ind <- aggregate(IndCorr ~ SimID * Block * Systematicity, data = Sim1Data, mean)
Sim1Cat <- aggregate(MonCatCorr ~ SimID * Block * Systematicity, data = Sim1Data, mean)

#Add a coding colum
Sim1Ind$Type <- "Individuation"
Sim1Cat$Type <- "Categorisation"

#Give columns the same names
colnames(Sim1Ind) <- c("SimID", "Block", "Systematicity", "Correctness", "Type")
colnames(Sim1Cat) <- c("SimID", "Block", "Systematicity", "Correctness", "Type")

#Rowbind into a single data frame
Sim1Agg <- rbind(Sim1Ind, Sim1Cat)

#Rename some stuff
Sim1Agg$Systematicity <- revalue(Sim1Agg$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))





ggplot(data=Sim1Agg, aes(x=Block, y=Correctness)) +
  geom_jitter(aes(shape = Systematicity), height = 0, size = 0.8, colour = 'darkgray') +
  geom_smooth(method = 'lm', aes(linetype = Systematicity), size = 1.2, colour = "black") +
  labs(x="Block", y="Proportion Correct") +
  #ggtitle("Results of Simulation I") +
  facet_wrap( ~ Type, nrow = 2) +
  theme(strip.text.y = element_text(angle = 0))+
  theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme_alan() +
  theme(legend.title=element_blank())

So that’s the best representation of the data, but unfortunately we don’t have that kind of data for Padraic’s 2011 paper, so we need to plot it another way

Sim1Agg2 <- do.call(data.frame, aggregate(Correctness ~ Systematicity * Block * Type, data= Sim1Agg, f1))

colnames(Sim1Agg2) <- c("Systematicity", "Block", "Type", "Correctness", "SD", "SE")
Sim1Agg2$Source <- "Replication"

Sim1Agg2 <- subset(Sim1Agg2, select = c("Source", "Systematicity", "Type", "Block", "Correctness", "SE"))

#Bump Down Some values so they will display nicely on the graph
Sim1Agg2$Correctness <- mapvalues(Sim1Agg2$Correctness,
                                  from = 1,
                                  to = 0.97)

#Load in Monaghan Results
MonResults <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/MonResults.csv")

# Combine into a single dataframe
Comparison <- rbind(Sim1Agg2, MonResults)


#Plot the Results

ggplot(data=Comparison, aes(x=Block, y= Correctness)) +
  geom_line(stat = "identity", 
            aes(col = Systematicity, linetype = Source), 
            size = 1.2) +
  geom_errorbar(aes(ymin = Correctness - SE,
                    ymax = Correctness + SE,
                    width = 0.1)) +
  labs(x="Block", y="Proportion Correct") +
  facet_wrap( ~ Type, nrow = 2) +
  theme(strip.text.y = element_text(angle = 0))+
  theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme_alan() +
  theme(legend.title=element_blank()) +
  scale_colour_manual(values = c("black", "#666666")) +
  scale_linetype_manual(values = c("solid", "dotted"))

Now we need graphs for Within-category errors

Comparison$ID <- paste(Comparison$Source, Comparison$Systematicity, Comparison$Block, sep = '')
CompInd <- subset(Comparison, Type == "Individuation")
CompCat <- subset(Comparison, Type == "Categorisation")

CompInd$Individuation <- CompInd$Correctness
CompCat$Categorisation <- CompCat$Correctness

CompInd <- arrange(CompInd, ID)
CompCat <- arrange(CompCat, ID)

CatError <- CompCat
CatError$Individuation <- CompInd$Individuation

CatError$Within <- CatError$Categorisation - CatError$Individuation

CatError <- subset(CatError, select = c(Source, Systematicity, Type, Block, Within))

ggplot(data=CatError, aes(x=Block, y= Within)) +
  geom_line(stat = "identity", 
            aes(col = Systematicity, linetype = Source), 
            size = 1.2) +
  labs(x="Block", y="Within-Category Error") +
  theme(strip.text.y = element_text(angle = 0))+
  theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme_alan() +
  theme(legend.title=element_blank()) +
  scale_colour_manual(values = c("black", "#666666")) +
  scale_linetype_manual(values = c("solid", "dotted")) +
  scale_y_continuous(limits = c(0, 1),
                     breaks = c(0.25, 0.5, 0.75,1))

Now we take a look at the full Simulation results

library(standardize)
Sim2.C.Std <- standardize(MonCatCorr ~ Systematicity * Author * Phonology + Block + (1|SimID), data= SimData, family= binomial)
Sim2.C.mod <- lmer(Sim2.C.Std$formula, Sim2.C.Std$data)

Sim2.afex.C <- mixed(Sim2.C.mod,
                   data = Sim2.C.Std$data, 
                   family = binomial,
                   control = glmerControl(optimizer = "Nelder_Mead"),
                   method = 'LRT',
                   progress = FALSE,
                   check_contrasts = FALSE)

## Formula (the first argument) converted to formula.

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00175552 (tol =
## 0.001, component 1)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.066735 (tol =
## 0.001, component 1)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

summary(Sim2.afex.C)  #Summary Table

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: MonCatCorr ~ Systematicity * Author * Phonology + Block + (1 |  
##     SimID)
##    Data: Sim2.C.Std$data
## Control: glmerControl(optimizer = "Nelder_Mead")
## 
##      AIC      BIC   logLik deviance df.resid 
##   8650.2   8726.6  -4315.1   8630.2    15350 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1174  0.0000  0.1306  0.4204  1.0394 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  SimID  (Intercept) 0        0       
## Number of obs: 15360, groups:  SimID, 320
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        7.04698    1.53497   4.591 4.41e-06 ***
## SystematicityA                    -5.80758    1.53496  -3.784 0.000155 ***
## AuthorM                           -0.08005    1.53464  -0.052 0.958398    
## PhonologyD                        -4.11069    1.53497  -2.678 0.007406 ** 
## Block                              0.50232    0.02781  18.060  < 2e-16 ***
## SystematicityA:AuthorM            -0.18244    1.53464  -0.119 0.905372    
## SystematicityA:PhonologyD          4.68933    1.53496   3.055 0.002250 ** 
## AuthorM:PhonologyD                -0.11072    1.53464  -0.072 0.942487    
## SystematicityA:AuthorM:PhonologyD -0.21597    1.53464  -0.141 0.888084    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SystmA AuthrM PhnlgD Block  SyA:AM SyA:PD AtM:PD
## SystemtctyA -1.000                                                 
## AuthorM     -0.422  0.422                                          
## PhonologyD  -0.999  0.999  0.422                                   
## Block        0.004  0.000  0.000  0.003                            
## SystmtcA:AM  0.422 -0.422 -1.000 -0.422  0.000                     
## SystmtcA:PD  0.999 -0.999 -0.422 -1.000 -0.001  0.422              
## AthrM:PhnlD  0.422 -0.422 -0.999 -0.422  0.000  0.999  0.422       
## SystA:AM:PD -0.422  0.422  0.999  0.422  0.000 -0.999 -0.422 -1.000
## convergence code: 0
## Model failed to converge with max|grad| = 0.00175552 (tol = 0.001, component 1)

Sim2.afex.C$anova_table  # Read Results from Here

## Mixed Model Anova Table (Type 3 tests, LRT-method)
## 
## Model: MonCatCorr ~ Systematicity * Author * Phonology + Block + (1 | 
## Model:     SimID)
## Data: $
## Data: Sim2.C.Std
## Data: data
## Df full model: 10
##                                Df   Chisq Chi Df Pr(>Chisq)    
## Systematicity                   9 833.708      1  < 2.2e-16 ***
## Author                          9   0.000      1     0.9997    
## Phonology                       9  39.628      1  3.073e-10 ***
## Block                           9 345.642      1  < 2.2e-16 ***
## Systematicity:Author            9   0.000      1     0.9997    
## Systematicity:Phonology         9 196.613      1  < 2.2e-16 ***
## Author:Phonology                9   0.000      1     0.9996    
## Systematicity:Author:Phonology  9   0.000      1     0.9996    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))

do.call(data.frame, aggregate(MonCatCorr ~ Block, data= SimData, f1))

##   Block MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1     1       0.7958333     0.4031438    0.06374264
## 2     2       0.8583333     0.3487533    0.05514275
## 3     3       0.8919271     0.3105129    0.04909640
## 4     4       0.9208333     0.2700339    0.04269610

do.call(data.frame, aggregate(MonCatCorr ~ Systematicity, data= SimData, f1))

##   Systematicity MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1             A       0.7432292    0.43688033    0.06907685
## 2             S       0.9902344    0.09834387    0.01554953

do.call(data.frame, aggregate(MonCatCorr ~ Author, data= SimData, f1))

##   Author MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1      M       0.8527344     0.3543936    0.05603455
## 2      N       0.8807292     0.3241280    0.05124914

do.call(data.frame, aggregate(MonCatCorr ~ Phonology, data= SimData, f1))

##   Phonology MonCatCorr.Mean MonCatCorr.SD MonCatCorr.SE
## 1         D       0.9080729     0.2889418    0.04568571
## 2         P       0.8253906     0.3796574    0.06002910

Unfortunately the above models fail to converge meaningfully - so below I’ll try a glm without the random effects

SimDataAgg.C <- do.call(data.frame, aggregate(MonCatCorr ~ Systematicity * Author * Phonology * Block * SimID,
                                              data = SimData, f1))

aov_ez("SimID", "MonCatCorr.Mean", data = SimDataAgg.C, 
        between = c("Systematicity", "Author", "Phonology"),
        within = "Block",
       return = "nice")

## Contrasts set to contr.sum for the following variables: Systematicity, Author, Phonology

## Anova Table (Type 3 tests)
## 
## Response: MonCatCorr.Mean
##                                  Effect     df  MSE           F  ges
## 1                         Systematicity 1, 312 0.01 3272.61 ***  .80
## 2                                Author 1, 312 0.01   42.04 ***  .05
## 3                             Phonology 1, 312 0.01  366.70 ***  .31
## 4                  Systematicity:Author 1, 312 0.01   54.59 ***  .06
## 5               Systematicity:Phonology 1, 312 0.01  560.40 ***  .41
## 6                      Author:Phonology 1, 312 0.01   93.71 ***  .10
## 7        Systematicity:Author:Phonology 1, 312 0.01  112.04 ***  .12
## 8                                 Block 3, 936 0.00  287.91 ***  .36
## 9                   Systematicity:Block 3, 936 0.00  142.62 ***  .22
## 10                         Author:Block 3, 936 0.00   12.85 ***  .02
## 11                      Phonology:Block 3, 936 0.00   60.60 ***  .11
## 12           Systematicity:Author:Block 3, 936 0.00    6.28 ***  .01
## 13        Systematicity:Phonology:Block 3, 936 0.00     5.31 **  .01
## 14               Author:Phonology:Block 3, 936 0.00    8.67 ***  .02
## 15 Systematicity:Author:Phonology:Block 3, 936 0.00     4.93 ** .010
##    p.value
## 1   <.0001
## 2   <.0001
## 3   <.0001
## 4   <.0001
## 5   <.0001
## 6   <.0001
## 7   <.0001
## 8   <.0001
## 9   <.0001
## 10  <.0001
## 11  <.0001
## 12   .0003
## 13    .001
## 14  <.0001
## 15    .002
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Let’s see if things converge any better for individuation

Sim2.I.Std <- standardize(IndCorr ~ Systematicity * Author * Phonology + Block + (1|SimID), data= SimData, family= binomial)
Sim2.I.mod <- lmer(Sim2.I.Std$formula, Sim2.I.Std$data)

Sim2.afex.I <- mixed(Sim2.I.mod,
                   data = Sim2.I.Std$data, 
                   family = binomial,
                   control = glmerControl(optimizer = "Nelder_Mead"),
                   method = 'LRT',
                   progress = FALSE,
                   check_contrasts = FALSE)

## Formula (the first argument) converted to formula.

summary(Sim2.afex.I)  #Summary Table

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## IndCorr ~ Systematicity * Author * Phonology + Block + (1 | SimID)
##    Data: Sim2.I.Std$data
## Control: glmerControl(optimizer = "Nelder_Mead")
## 
##      AIC      BIC   logLik deviance df.resid 
##  17562.7  17639.1  -8771.3  17542.7    15350 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4056 -0.7401 -0.3089  0.7258  3.6887 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  SimID  (Intercept) 0.08579  0.2929  
## Number of obs: 15360, groups:  SimID, 320
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       -0.04830    0.02473   -1.95   0.0508 .  
## SystematicityA                    -0.38512    0.02484  -15.51  < 2e-16 ***
## AuthorM                           -0.21543    0.02476   -8.70  < 2e-16 ***
## PhonologyD                         0.66991    0.02508   26.71  < 2e-16 ***
## Block                              0.78107    0.01972   39.61  < 2e-16 ***
## SystematicityA:AuthorM            -0.12303    0.02474   -4.97 6.58e-07 ***
## SystematicityA:PhonologyD          0.06262    0.02473    2.53   0.0113 *  
## AuthorM:PhonologyD                -0.27555    0.02479  -11.12  < 2e-16 ***
## SystematicityA:AuthorM:PhonologyD -0.06127    0.02473   -2.48   0.0132 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SystmA AuthrM PhnlgD Block  SyA:AM SyA:PD AtM:PD
## SystemtctyA  0.018                                                 
## AuthorM     -0.023  0.011                                          
## PhonologyD   0.000 -0.053 -0.030                                   
## Block       -0.012 -0.091 -0.051  0.160                            
## SystmtcA:AM  0.007 -0.020  0.019 -0.003 -0.033                     
## SystmtcA:PD -0.038  0.000  0.002  0.019  0.013 -0.021              
## AthrM:PhnlD -0.020  0.010  0.005 -0.035 -0.066 -0.036  0.005       
## SystA:AM:PD  0.003 -0.019 -0.037  0.003 -0.017  0.002 -0.024  0.019

Sim2.afex.I  # Read Results from Here

## Mixed Model Anova Table (Type 3 tests, LRT-method)
## 
## Model: IndCorr ~ Systematicity * Author * Phonology + Block + (1 | SimID)
## Data: $
## Data: Sim2.I.Std
## Data: data
## Df full model: 10
##                           Effect df       Chisq p.value
## 1                  Systematicity  1  182.67 ***  <.0001
## 2                         Author  1   68.63 ***  <.0001
## 3                      Phonology  1  388.13 ***  <.0001
## 4                          Block  1 1785.40 ***  <.0001
## 5           Systematicity:Author  1   23.86 ***  <.0001
## 6        Systematicity:Phonology  1      6.35 *     .01
## 7               Author:Phonology  1  105.94 ***  <.0001
## 8 Systematicity:Author:Phonology  1      6.08 *     .01
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(IndCorr ~ Block, data= SimData, f1))

##   Block IndCorr.Mean IndCorr.SD IndCorr.SE
## 1     1    0.2549479  0.4358887 0.06892005
## 2     2    0.4463542  0.4971785 0.07861083
## 3     3    0.5851563  0.4927592 0.07791207
## 4     4    0.6723958  0.4694007 0.07421877

do.call(data.frame, aggregate(IndCorr ~ Systematicity, data= SimData, f1))

##   Systematicity IndCorr.Mean IndCorr.SD IndCorr.SE
## 1             A    0.4128906  0.4923855 0.07785299
## 2             S    0.5665365  0.4955854 0.07835893

do.call(data.frame, aggregate(IndCorr ~ Author, data= SimData, f1))

##   Author IndCorr.Mean IndCorr.SD IndCorr.SE
## 1      M    0.4470052  0.4972160 0.07861675
## 2      N    0.5324219  0.4989802 0.07889570

do.call(data.frame, aggregate(IndCorr ~ Phonology, data= SimData, f1))

##   Phonology IndCorr.Mean IndCorr.SD IndCorr.SE
## 1         D    0.6250000  0.4841544 0.07655154
## 2         P    0.3544271  0.4783705 0.07563702

So that converged… good news

Now we just do it for Within-category error

Sim2.WIC.Std <- standardize(MonTrueCat ~ Systematicity * Author * Phonology + Block + (1|SimID), 
                            data= SimData, family= binomial)

Sim2.WIC.mod <- lmer(Sim2.WIC.Std$formula, Sim2.WIC.Std$data)

Sim2.afex.WIC <- mixed(Sim2.WIC.mod,
                   data = Sim2.WIC.Std$data, 
                   family = binomial,
                   control = glmerControl(optimizer = "Nelder_Mead"),
                   method = 'LRT',
                   progress = FALSE,
                   check_contrasts = FALSE)

## Formula (the first argument) converted to formula.

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.116122 (tol =
## 0.001, component 1)

summary(Sim2.afex.WIC)  #Summary Table

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: MonTrueCat ~ Systematicity * Author * Phonology + Block + (1 |  
##     SimID)
##    Data: Sim2.WIC.Std$data
## Control: glmerControl(optimizer = "Nelder_Mead")
## 
##      AIC      BIC   logLik deviance df.resid 
##  18490.4  18566.8  -9235.2  18470.4    15350 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0213 -0.7318 -0.4779  0.9393  3.0839 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  SimID  (Intercept) 0.06292  0.2508  
## Number of obs: 15360, groups:  SimID, 320
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       -0.57599    0.02297 -25.080  < 2e-16 ***
## SystematicityA                    -0.20945    0.02283  -9.173  < 2e-16 ***
## AuthorM                            0.15649    0.02282   6.857 7.03e-12 ***
## PhonologyD                        -0.44726    0.02291 -19.523  < 2e-16 ***
## Block                             -0.52562    0.01838 -28.605  < 2e-16 ***
## SystematicityA:AuthorM             0.05399    0.02281   2.367   0.0179 *  
## SystematicityA:PhonologyD          0.17046    0.02283   7.468 8.16e-14 ***
## AuthorM:PhonologyD                 0.18132    0.02283   7.943 1.97e-15 ***
## SystematicityA:AuthorM:PhonologyD -0.03446    0.02281  -1.511   0.1309    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SystmA AuthrM PhnlgD Block  SyA:AM SyA:PD AtM:PD
## SystemtctyA  0.016                                                 
## AuthorM     -0.052 -0.006                                          
## PhonologyD   0.072  0.006 -0.044                                   
## Block        0.103  0.039 -0.024  0.079                            
## SystmtcA:AM -0.006 -0.049  0.012 -0.006 -0.010                     
## SystmtcA:PD -0.002  0.060 -0.004  0.008 -0.034 -0.041              
## AthrM:PhnlD -0.045 -0.007  0.063 -0.052 -0.029  0.002 -0.004       
## SystA:AM:PD -0.004 -0.041  0.002 -0.005  0.007  0.062 -0.049  0.011

Sim2.afex.WIC  # Read Results from Here

## Warning: lme4 reported (at least) the following warnings for 'Block':
##   * Model failed to converge with max|grad| = 0.116122 (tol = 0.001, component 1)

## Mixed Model Anova Table (Type 3 tests, LRT-method)
## 
## Model: MonTrueCat ~ Systematicity * Author * Phonology + Block + (1 | 
## Model:     SimID)
## Data: $
## Data: Sim2.WIC.Std
## Data: data
## Df full model: 10
##                           Effect df      Chisq p.value
## 1                  Systematicity  1  75.35 ***  <.0001
## 2                         Author  1  44.20 ***  <.0001
## 3                      Phonology  1 258.27 ***  <.0001
## 4                          Block  1 868.96 ***  <.0001
## 5           Systematicity:Author  1     5.56 *     .02
## 6        Systematicity:Phonology  1  51.65 ***  <.0001
## 7               Author:Phonology  1  58.22 ***  <.0001
## 8 Systematicity:Author:Phonology  1       2.27     .13
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(MonTrueCat ~ Block, data= SimData, f1))

##   Block MonTrueCat.Mean MonTrueCat.SD MonTrueCat.SE
## 1     1       0.5408854     0.4983905    0.07880245
## 2     2       0.4119792     0.4922555    0.07783242
## 3     3       0.3067708     0.4612135    0.07292426
## 4     4       0.2484375     0.4321631    0.06833099

do.call(data.frame, aggregate(MonTrueCat ~ Systematicity, data= SimData, f1))

##   Systematicity MonTrueCat.Mean MonTrueCat.SD MonTrueCat.SE
## 1             A       0.3303385     0.4703656    0.07437133
## 2             S       0.4236979     0.4941759    0.07813607

do.call(data.frame, aggregate(MonTrueCat ~ Author, data= SimData, f1))

##   Author MonTrueCat.Mean MonTrueCat.SD MonTrueCat.SE
## 1      M       0.4057292     0.4910646    0.07764413
## 2      N       0.3483073     0.4764650    0.07533573

do.call(data.frame, aggregate(MonTrueCat ~ Phonology, data= SimData, f1))

##   Phonology MonTrueCat.Mean MonTrueCat.SD MonTrueCat.SE
## 1         D       0.2830729     0.4505209    0.07123361
## 2         P       0.4709635     0.4991887    0.07892866

#Aggregate Data
Sim2Ind <- aggregate(IndCorr ~ SimID * Block * Systematicity * Author * Phonology, data = SimData, mean)
Sim2Cat <- aggregate(MonCatCorr ~ SimID * Block * Systematicity * Author * Phonology, data = SimData, mean)
Sim2WIC <- aggregate(MonTrueCat ~ SimID * Block * Systematicity * Author * Phonology, data = SimData, mean)

#Add a coding colum
Sim2Ind$Type <- "Individuation"
Sim2Cat$Type <- "Categorisation"
Sim2WIC$Type <- "Within-Category Error"

#Give columns the same names
colnames(Sim2Ind) <- c("SimID", "Block", "Systematicity", "Author", "Phonology", "Correctness", "Type")
colnames(Sim2Cat) <- c("SimID", "Block", "Systematicity", "Author", "Phonology", "Correctness", "Type")
colnames(Sim2WIC) <- c("SimID", "Block", "Systematicity", "Author", "Phonology", "Correctness", "Type")

#Rowbind into a single data frame
Sim2Agg <- rbind(Sim2Ind, Sim2Cat)

#Rename some stuff
Sim2Agg$Systematicity <- revalue(Sim2Agg$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
Sim2Agg$Author <- revalue(Sim2Agg$Author, c("M" = "Monaghan", "N" = "Nielsen"))
Sim2Agg$Phonology <- revalue(Sim2Agg$Phonology, c("P" = "Clustered", "D" = "Dispersed"))

Sim2Agg2 <- do.call(data.frame, aggregate(Correctness ~ Systematicity * Block * Type * Author * Phonology, data= Sim2Agg, f1))

colnames(Sim2Agg2) <- c("Systematicity", "Block", "Type", "Author", "Phonology", "Correctness", "SD", "SE")

Sim2Agg2 <- subset(Sim2Agg2, select = c("Block", "Systematicity", "Author", "Phonology", "Type", "Correctness", "SE"))


ggplot(data=Sim2Agg2, aes(x=Block, y= Correctness)) +
  geom_line(stat = "identity", 
            aes(col = Systematicity,
                linetype = Phonology), 
            size = 1.2) +
  geom_errorbar(aes(ymin = Correctness - SE,
                    ymax = Correctness + SE,
                    width = 0.1)) +
  labs(x="Block", y="Proportion Correct") +
  facet_grid(Type ~ Author) +
  theme(strip.text.y = element_text(angle = 0))+
  theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme_alan() +
  theme(legend.title=element_blank()) +
  scale_colour_manual(values = c("black", "#666666")) +
  scale_linetype_manual(values = c("dotted", "solid"))

#And then we need a graph of Within-category error

Sim2Agg3 <- do.call(data.frame, aggregate(Correctness ~ Systematicity * Block * Type * Author * Phonology, data = Sim2WIC, f1))
colnames(Sim2Agg3) <- c("Systematicity", "Block", "Type", "Author", "Phonology", "Correctness", "SD", "SE")

Sim2Agg3 <- subset(Sim2Agg3, select = c("Block", "Systematicity", "Author", "Phonology", "Type", "Correctness", "SE"))

Sim2Agg3$Systematicity <- revalue(Sim2Agg3$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
Sim2Agg3$Author <- revalue(Sim2Agg3$Author, c("M" = "Monaghan", "N" = "Nielsen"))
Sim2Agg3$Phonology <- revalue(Sim2Agg3$Phonology, c("P" = "Clustered", "D" = "Dispersed"))


ggplot(data=Sim2Agg3, aes(x=Block, y= Correctness)) +
  geom_line(stat = "identity", 
            aes(col = Systematicity,
                linetype = Phonology), 
            size = 1.2) +
  geom_errorbar(aes(ymin = Correctness - SE,
                    ymax = Correctness + SE,
                    width = 0.1)) +
  labs(x="Block", y="Proportion of Within-Category Errors") +
  facet_wrap( ~ Author, nrow = 1) +
  theme(strip.text.y = element_text(angle = 0))+
  theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme_alan() +
  theme(legend.title=element_blank()) +
  scale_colour_manual(values = c("black", "#666666")) +
  scale_linetype_manual(values = c("dotted", "solid"))

Experiment Data

Now we can start looking at the experiment data

The first thing we want to do is take a look at the similarity of words to each other in all four of the experimental conditions

ICAgg <- aggregate(ICDist ~ ID * Phon * Sys, data = Exp1, mean)
colnames(ICAgg) <- c("ID", "Phonology", "Systematicity", "Distance" )
ICAgg$Type <- "Within-Class"

OCAgg <- aggregate(OCDist ~ ID * Phon * Sys, data = Exp1, mean)
colnames(OCAgg) <- c("ID", "Phonology", "Systematicity", "Distance" )
OCAgg$Type <- "Between-Class"

EucAgg <- rbind(OCAgg, ICAgg )

EucAgg$Systematicity <- revalue(EucAgg$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
EucAgg$Phonology <- factor(EucAgg$Phonology, level = c("P", "D"), labels = c("Clustered", "Dispersed"))
EucAgg$Type <- factor(EucAgg$Type, level = c("Within-Class", "Between-Class"))
EucAgg$Condition <- paste(EucAgg$Systematicity,EucAgg$Phonology)
EucAgg$Condition <- factor(EucAgg$Condition, 
                           level = c("Arbitrary Clustered", "Systematic Clustered", "Arbitrary Dispersed", "Systematic Dispersed"), 
                           labels = c("Clustered", " Clustered", "Dispersed", " Dispersed"))


ggplot(data=EucAgg, aes(x= Condition, y=Distance)) +
  geom_bar(position= position_dodge(1),
           aes(fill = Systematicity),
           stat = "identity", width = .6) +
  geom_jitter(height = 0, size = 1.2, colour = 'darkgray') +
  labs(x="", y="Euclidean Distance") +
  facet_wrap( ~ Type) +
  theme_alan() +
  theme(strip.text.y = element_text(hjust= 0)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = c("black", "#666666")) +
  theme(legend.title=element_blank())

SO that’s Euclidean distances. Let’s start taking a look at the actual statistics

# Get counts of hits and false alarms for each participant
Exp1Hits <- subset(Exp1, RespType == "H")
Exp1FAs <- subset(Exp1, RespType == "FA")

HitAgg <- data.frame(xtabs(~ ID, data= Exp1Hits))
colnames(HitAgg) <- c("ID", "Hits")
FAAgg <- data.frame(xtabs(~ ID, data= Exp1FAs))
colnames(FAAgg) <- c("ID", "FAs")

#Append those into a single dataframe with condition data
Exp1Agg <- aggregate(Correctness ~ ID * Sys * Phon, data= Exp1, mean)
Exp1Agg <- Exp1Agg[order(Exp1Agg$ID),]

Exp1Agg <- cbind(Exp1Agg, HitAgg)
Exp1Agg <- cbind(Exp1Agg, FAAgg)

Exp1Agg <- subset(Exp1Agg, select = c(ID, Sys, Phon, Correctness, Hits, FAs))

#Calculate pHit and pFA
Exp1Agg$pHit <- Exp1Agg$Hits / 12
Exp1Agg$pFA <- Exp1Agg$FAs / 36

#Calculate D Prime
Exp1Agg$DPrime <- qnorm(Exp1Agg$pHit) - qnorm(Exp1Agg$pFA)
Exp1Agg$C <- 0.5 * ((qnorm(Exp1Agg$pHit) + qnorm(Exp1Agg$pFA)))

#This produces impossible values of INF becuase there are participants with 100% hits or 0% False Alarms- we need to rectify that
Exp1Agg$Hits <- mapvalues(Exp1Agg$Hits, from = c(12, 0), to = c(11.5, 0.5))

## The following `from` values were not present in `x`: 0

Exp1Agg$FAs <- mapvalues(Exp1Agg$FAs, from = c(36, 0), to = c(35.5, 0.5))

## The following `from` values were not present in `x`: 36

Exp1Agg$pHit <- Exp1Agg$Hits / 12
Exp1Agg$pFA <- Exp1Agg$FAs / 36
Exp1Agg$DPrime <- qnorm(Exp1Agg$pHit) - qnorm(Exp1Agg$pFA)
Exp1Agg$C <- 0.5 * ((qnorm(Exp1Agg$pHit) + qnorm(Exp1Agg$pFA)))



# Now we can plot the results

colnames(Exp1Agg) <- c("ID", "Systematicity", "Phonology", "Correctness", "Hits", "FAs", "pHit", "pFA", "DPrime", "C" )

Exp1Agg$Systematicity <- revalue(Exp1Agg$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
Exp1Agg$Phonology <- factor(Exp1Agg$Phonology, level = c("P", "D"), labels = c("Clustered", "Dispersed"))

Exp1Agg$Condition <- paste(Exp1Agg$Systematicity,Exp1Agg$Phonology)
Exp1Agg$Condition <- factor(Exp1Agg$Condition, 
                           level = c("Arbitrary Clustered", "Systematic Clustered", "Arbitrary Dispersed", "Systematic Dispersed"), 
                           labels = c("Clustered", " Clustered", "Dispersed", " Dispersed"))


ggplot(data=Exp1Agg, aes(x= Condition, y=DPrime)) +
  geom_bar(position= position_dodge(width = 1),
           aes(fill = Systematicity),
           stat = "summary", fun.y = "mean",
           width = .6) +
  geom_jitter(height = 0, size = 1.2, colour = 'darkgray') +
  labs(x="", y="D Prime Performance") +
  theme_alan() +
  theme(strip.text.y = element_text(hjust= 0)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = c("black", "#666666")) +
  theme(legend.title=element_blank())

#And some stats on this data:

#unfortunately afex does not work with aggregated data formats, and d prime is always an aggregate measure, so we need to do something else

# Unfortunately I can't figure out how to get D Prime to work with a random effect- becaues D prime is inherent aggregated I cant' seem to add a by-participant random effect.
Exp1D.model <- glm(DPrime ~ Phonology * Systematicity , data = Exp1Agg)

summary(Exp1D.model)

## 
## Call:
## glm(formula = DPrime ~ Phonology * Systematicity, data = Exp1Agg)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.60362  -0.38476  -0.09698   0.44399   2.11637  
## 
## Coefficients:
##                                            Estimate Std. Error t value
## (Intercept)                                  0.4258     0.2348   1.813
## PhonologyDispersed                           0.1768     0.3321   0.532
## SystematicitySystematic                      1.3899     0.3321   4.186
## PhonologyDispersed:SystematicitySystematic  -0.9515     0.4696  -2.026
##                                            Pr(>|t|)    
## (Intercept)                                0.078146 .  
## PhonologyDispersed                         0.597680    
## SystematicitySystematic                    0.000175 ***
## PhonologyDispersed:SystematicitySystematic 0.050226 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.5513871)
## 
##     Null deviance: 31.364  on 39  degrees of freedom
## Residual deviance: 19.850  on 36  degrees of freedom
## AIC: 95.488
## 
## Number of Fisher Scoring iterations: 2

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(20))

do.call(data.frame, aggregate(DPrime ~ Systematicity, data= Exp1Agg, f1))

##   Systematicity DPrime.Mean DPrime.SD DPrime.SE
## 1     Arbitrary   0.5141776 0.6750417 0.1509439
## 2    Systematic   1.4283827 0.8690203 0.1943188

do.call(data.frame, aggregate(DPrime ~ Phonology, data= Exp1Agg, f1))

##   Phonology DPrime.Mean DPrime.SD DPrime.SE
## 1 Clustered   1.1207379 1.1079829 0.2477525
## 2 Dispersed   0.8218224 0.6132734 0.1371321

So nowe we can start doing the real meaty stats for the Experiment - an analysis that includes performance on the different trial types

Exp1.Std <- standardize(Correctness ~ Sys * Phon * TrialType + (1 + TrialType|ID), 
                            data= Exp1, family= binomial)

Exp1.Mod <- lmer(Exp1.Std$formula, Exp1.Std$data)

Exp1.afex <- mixed(Exp1.Mod,
                   data = Exp1.Std$data, 
                   family = binomial,
                   control = glmerControl(optimizer = "Nelder_Mead"),
                   method = 'LRT',
                   progress = FALSE)

## Formula (the first argument) converted to formula.

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00252673 (tol =
## 0.001, component 1)

summary(Exp1.afex)  #Summary Table

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correctness ~ Sys * Phon * TrialType + (1 + TrialType | ID)
##    Data: Exp1.Std$data
## Control: glmerControl(optimizer = "Nelder_Mead")
## 
##      AIC      BIC   logLik deviance df.resid 
##   2253.0   2353.1  -1108.5   2217.0     1902 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4681 -0.9285  0.4001  0.7383  1.8966 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr       
##  ID     (Intercept) 0.1749   0.4182              
##         TrialTypeIC 0.4664   0.6829   -0.04      
##         TrialTypeOC 0.1673   0.4090    0.36 -0.70
## Number of obs: 1920, groups:  ID, 40
## 
## Fixed effects:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             0.71216    0.08850   8.047 8.48e-16 ***
## SysA                   -0.31852    0.08817  -3.613 0.000303 ***
## PhonD                  -0.12327    0.08735  -1.411 0.158199    
## TrialTypeIC            -0.65352    0.13562  -4.819 1.45e-06 ***
## TrialTypeOC             0.52334    0.10377   5.043 4.58e-07 ***
## SysA:PhonD              0.17400    0.08735   1.992 0.046369 *  
## SysA:TrialTypeIC        0.60921    0.13550   4.496 6.92e-06 ***
## SysA:TrialTypeOC       -0.51127    0.10293  -4.967 6.79e-07 ***
## PhonD:TrialTypeIC       0.14023    0.13491   1.039 0.298608    
## PhonD:TrialTypeOC      -0.12244    0.10107  -1.211 0.225731    
## SysA:PhonD:TrialTypeIC -0.28260    0.13493  -2.094 0.036224 *  
## SysA:PhonD:TrialTypeOC  0.20921    0.10114   2.069 0.038588 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SysA   PhonD  TrlTIC TrlTOC SyA:PD SA:TTI SA:TTO PD:TTI
## SysA        -0.108                                                        
## PhonD       -0.053  0.054                                                 
## TrialTypeIC -0.043  0.059  0.029                                          
## TrialTypeOC  0.136 -0.080 -0.044 -0.560                                   
## SysA:PhonD   0.056 -0.052 -0.089 -0.033  0.048                            
## SysA:TrlTIC  0.059 -0.036 -0.032 -0.054  0.061  0.029                     
## SysA:TrlTOC -0.079  0.135  0.047  0.061 -0.220 -0.044 -0.565              
## PhnD:TrlTIC  0.030 -0.033 -0.028 -0.025  0.032  0.049  0.024 -0.032       
## PhnD:TrlTOC -0.046  0.049  0.124  0.033 -0.115 -0.066 -0.033  0.120 -0.568
## SyA:PD:TTIC -0.034  0.030  0.049  0.024 -0.033 -0.028 -0.024  0.032 -0.046
## SyA:PD:TTOC  0.050 -0.046 -0.066 -0.034  0.122  0.125  0.033 -0.115  0.053
##             PD:TTO SA:PD:TTI
## SysA                        
## PhonD                       
## TrialTypeIC                 
## TrialTypeOC                 
## SysA:PhonD                  
## SysA:TrlTIC                 
## SysA:TrlTOC                 
## PhnD:TrlTIC                 
## PhnD:TrlTOC                 
## SyA:PD:TTIC  0.053          
## SyA:PD:TTOC -0.187 -0.568

Exp1.afex  # Read Results from Here

## Warning: lme4 reported (at least) the following warnings for 'Sys':
##   * Model failed to converge with max|grad| = 0.00252673 (tol = 0.001, component 1)

## Mixed Model Anova Table (Type 3 tests, LRT-method)
## 
## Model: Correctness ~ Sys * Phon * TrialType + (1 + TrialType | ID)
## Data: $
## Data: Exp1.Std
## Data: data
## Df full model: 18
##               Effect df     Chisq p.value
## 1                Sys  1 11.56 ***   .0007
## 2               Phon  1      1.95     .16
## 3          TrialType  2 23.83 ***  <.0001
## 4           Sys:Phon  1    3.79 +     .05
## 5      Sys:TrialType  2 22.62 ***  <.0001
## 6     Phon:TrialType  2      1.58     .45
## 7 Sys:Phon:TrialType  2    4.94 +     .08
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(40))
do.call(data.frame, aggregate(Correctness ~ TrialType, data= Exp1, f1))

##   TrialType Correctness.Mean Correctness.SD Correctness.SE
## 1        IC        0.5166667      0.5002435     0.07909544
## 2        OC        0.7218750      0.4483089     0.07088386
## 3         T        0.6770833      0.4680790     0.07400979

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(20))
do.call(data.frame, aggregate(Correctness ~ Sys, data= Exp1, f1))

##   Sys Correctness.Mean Correctness.SD Correctness.SE
## 1   A        0.5916667      0.4917816     0.10996571
## 2   S        0.7270833      0.4456906     0.09965944

do.call(data.frame, aggregate(Correctness ~ Phon, data= Exp1, f1))

##   Phon Correctness.Mean Correctness.SD Correctness.SE
## 1    D        0.6447917      0.4788259      0.1070687
## 2    P        0.6739583      0.4690071      0.1048732

#we can use lsmeans for our post-hoc tests

# Post hoc test of trial type
(Exp1.LS <- lsmeans(Exp1.afex, "TrialType"))

## NOTE: Results may be misleading due to involvement in interactions

##  TrialType     lsmean        SE df  asymp.LCL asymp.UCL
##  IC        0.05864515 0.1587193 NA -0.2524390 0.3697293
##  OC        1.23549550 0.1452580 NA  0.9507950 1.5201959
##  T         0.84234063 0.1406081 NA  0.5667539 1.1179274
## 
## Results are averaged over the levels of: Sys, Phon 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

pairs(Exp1.LS)

##  contrast   estimate        SE df z.ratio p.value
##  IC - OC  -1.1768503 0.2119451 NA  -5.553  <.0001
##  IC - T   -0.7836955 0.2298352 NA  -3.410  0.0019
##  OC - T    0.3931549 0.1730689 NA   2.272  0.0598
## 
## Results are averaged over the levels of: Sys, Phon 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates

contrast(pairs(Exp1.LS), by = NULL, adjust = "holm")

##  contrast         estimate         SE df z.ratio p.value
##  IC - OC effect -0.6543867 0.15232667 NA  -4.296  <.0001
##  IC - T effect  -0.2612318 0.07661174 NA  -3.410  0.0007
##  OC - T effect   0.9156185 0.16790683 NA   5.453  <.0001
## 
## Results are averaged over the levels of: Sys, Phon 
## P value adjustment: holm method for 3 tests

#Post hoc test of Systematicity x Trial Type Interaction

(Exp1.LS.2 <- lsmeans(Exp1.afex, c("TrialType", "Sys")))

## NOTE: Results may be misleading due to involvement in interactions

##  TrialType Sys     lsmean        SE df   asymp.LCL asymp.UCL
##  IC        A    0.3493356 0.2227875 NA -0.08731996 0.7859911
##  OC        A    0.4057044 0.1805994 NA  0.05173604 0.7596727
##  T         A    0.4258796 0.1851401 NA  0.06301162 0.7887475
##  IC        S   -0.2320453 0.2265179 NA -0.67601216 0.2119216
##  OC        S    2.0652866 0.2262789 NA  1.62178807 2.5087852
##  T         S    1.2588017 0.2097993 NA  0.84760271 1.6700007
## 
## Results are averaged over the levels of: Phon 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95

pairs(Exp1.LS.2)

##  contrast       estimate        SE df z.ratio p.value
##  IC,A - OC,A -0.05636879 0.2819699 NA  -0.200  1.0000
##  IC,A - T,A  -0.07654398 0.3155035 NA  -0.243  0.9999
##  IC,A - IC,S  0.58138085 0.3179968 NA   1.828  0.4474
##  IC,A - OC,S -1.71595106 0.3175645 NA  -5.403  <.0001
##  IC,A - T,S  -0.90946614 0.3060855 NA  -2.971  0.0352
##  OC,A - T,A  -0.02017519 0.2126513 NA  -0.095  1.0000
##  OC,A - IC,S  0.63774964 0.2896901 NA   2.201  0.2369
##  OC,A - OC,S -1.65958226 0.2885083 NA  -5.752  <.0001
##  OC,A - T,S  -0.85309735 0.2768714 NA  -3.081  0.0252
##  T,A - IC,S   0.65792483 0.2925153 NA   2.249  0.2154
##  T,A - OC,S  -1.63940708 0.2925126 NA  -5.605  <.0001
##  T,A - T,S   -0.83292216 0.2783930 NA  -2.992  0.0331
##  IC,S - OC,S -2.29733191 0.3158595 NA  -7.273  <.0001
##  IC,S - T,S  -1.49084699 0.3333550 NA  -4.472  0.0001
##  OC,S - T,S   0.80648492 0.2703870 NA   2.983  0.0340
## 
## Results are averaged over the levels of: Phon 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 6 estimates

contrast(pairs(Exp1.LS.2), by = NULL, adjust = "holm")

##  contrast             estimate        SE df z.ratio p.value
##  IC,A - OC,A effect  0.5348414 0.2785234 NA   1.920  0.2741
##  IC,A - T,A effect   0.5146662 0.2928471 NA   1.757  0.3154
##  IC,A - IC,S effect  1.1725910 0.2858534 NA   4.102  0.0004
##  IC,A - OC,S effect -1.1247409 0.2359900 NA  -4.766  <.0001
##  IC,A - T,S effect  -0.3182560 0.2092398 NA  -1.521  0.3848
##  OC,A - T,A effect   0.5710350 0.2208227 NA   2.586  0.0583
##  OC,A - IC,S effect  1.2289598 0.2860693 NA   4.296  0.0002
##  OC,A - OC,S effect -1.0683721 0.2353853 NA  -4.539  0.0001
##  OC,A - T,S effect  -0.2618872 0.2099425 NA  -1.247  0.4245
##  T,A - IC,S effect   1.2491350 0.3087585 NA   4.046  0.0005
##  T,A - OC,S effect  -1.0481969 0.2637880 NA  -3.974  0.0006
##  T,A - T,S effect   -0.2417120 0.2382689 NA  -1.014  0.4245
##  IC,S - OC,S effect -1.7061217 0.2990226 NA  -5.706  <.0001
##  IC,S - T,S effect  -0.8996368 0.3098738 NA  -2.903  0.0259
##  OC,S - T,S effect   1.3976951 0.2893984 NA   4.830  <.0001
## 
## Results are averaged over the levels of: Phon 
## P value adjustment: holm method for 15 tests

do.call(data.frame, aggregate(Correctness ~ TrialType, data= subset(Exp1, Sys == "A"), f1))

##   TrialType Correctness.Mean Correctness.SD Correctness.SE
## 1        IC        0.5791667      0.4947246      0.1106238
## 2        OC        0.5937500      0.4916447      0.1099351
## 3         T        0.6000000      0.4909218      0.1097734

do.call(data.frame, aggregate(Correctness ~ TrialType, data= subset(Exp1, Sys == "S"), f1))

##   TrialType Correctness.Mean Correctness.SD Correctness.SE
## 1        IC        0.4541667      0.4989354     0.11156535
## 2        OC        0.8500000      0.3574440     0.07992690
## 3         T        0.7541667      0.4314801     0.09648187

So those are the stats as they stand - I think they all make pretty good sense, assuming I’ve done and interpreted them correctly! Now lets visualise them

f1 <- function(x) c(Mean = mean(x), SD = sd(x), SE = sd(x)/sqrt(10))
Exp1Agg.2 <- do.call(data.frame, aggregate(Correctness ~ TrialType * Sys * Phon, data= Exp1, f1))

colnames(Exp1Agg.2) <- c("TrialType", "Systematicity", "Phonology", "Correctness", "SD", "SE" )

Exp1Agg.2$Systematicity <- revalue(Exp1Agg.2$Systematicity, c("A" = "Arbitrary", "S" = "Systematic"))
Exp1Agg.2$Phonology <- factor(Exp1Agg.2$Phonology, level = c("P", "D"), labels = c("Clustered", "Dispersed"))
Exp1Agg.2$TrialType <- factor(Exp1Agg.2$TrialType, level = c("T", "IC", "OC"), labels = c("Target", "In-Class", "Out-of-Class"))

Exp1Agg.2$Condition <- paste(Exp1Agg.2$Systematicity,Exp1Agg.2$Phonology)
Exp1Agg.2$Condition <- factor(Exp1Agg.2$Condition, 
                           level = c("Arbitrary Clustered", "Systematic Clustered", "Arbitrary Dispersed", "Systematic Dispersed"), 
                           labels = c("Arbitrary Clustered", "Systematic Clustered", "Arbitrary Dispersed", "Systematic Dispersed"))


ggplot(data=Exp1Agg.2, aes(x= TrialType, y=Correctness, fill = Systematicity)) +
  geom_bar(position= position_dodge(.7),
           stat = "identity", fun.y = "mean",
           width = .5,
           aes(colour = Phonology, linetype = Phonology),
           size = 1.5) +
  geom_errorbar(position = position_dodge(.7),
                aes(ymin = Correctness - SE,
                    ymax = Correctness + SE,
                    width = 0.2,
                    linetype = Phonology),
                size = 1.2,
                colour = 'black') +
  labs(x="", y="Proportion Correct") +
  theme_alan() +
  theme(strip.text.y = element_text(hjust= 0)) +
  theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = c("black", "#666666")) +
  theme(legend.title=element_blank()) +
  scale_color_manual(values = c("gray70", "ghostwhite"))

## Warning: Ignoring unknown parameters: fun.y

Correlation between phonological feature edit distance and confusability

ConfxEuc <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/ConfxEuc.csv")
ConfxEuc <- subset(ConfxEuc, select = c(Phon1, Phon2, Source, Type, Euc, Confusion))
colnames(ConfxEuc) <- c("Phoneme1","Phoneme2", "Source", "Type", "Euclidean", "Confusability")
ConfxEuc$Pair <- paste(ConfxEuc$Phoneme1, ConfxEuc$Phoneme2, sep = ',')

(ConfxEuc.corr <- cor.test(ConfxEuc$Euclidean, ConfxEuc$Confusability))

## 
##  Pearson's product-moment correlation
## 
## data:  ConfxEuc$Euclidean and ConfxEuc$Confusability
## t = -0.68922, df = 22, p-value = 0.4979
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5183772  0.2740896
## sample estimates:
##        cor 
## -0.1453815

ggplot(data=ConfxEuc, aes(x= Euclidean, y=Confusability)) +
  geom_point(aes(colour= Source, shape = Type), size = 2) +
  geom_text(aes(label = Pair), hjust = 1.5, size = 3) +
  annotate(geom = "text", x= 3.5, y= 5, label = "r= -0.14, p = 0.498") +
  geom_smooth(method = 'lm', col = 'black', se= FALSE)+
  labs(x="Euclidean Distance", y="Confusability") +
  theme_alan() +
  theme(strip.text.y = element_text(hjust= 0)) +
  theme(axis.text.x = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme(legend.title=element_blank())

Phonological Disperson Manuscript Statistics

Alan Nielsen

November 20, 2017

Experiment Data

Correlation between phonological feature edit distance and confusability