#Introduction In this document we’re going to explore data from Westbury et al. (2018) in an attempt to take a look at the “neutrality” of the conventional consonants and vowels used in Nielsen et al (this manuscript).
This raw data was obtained from David Sidhu, one of the authors of Westbury et al. (2018) for us to explore. Thanks to David, Chris, and those others involved.
First we’ll read in the data and load some libraries etc (note we’re loading in the fraction of the total data from that experiment most germane to our purposes)
library(lme4)
library(plyr)
library(ggplot2)
library(afex)
library(emmeans)
library(ggthemes)
library(tidyverse)
library(kableExtra)
library(Hmisc)
library(binom)
library(Rmisc)
library(magick)
library(webshot)
library(magrittr)
library(multcomp)
theme_alan <- function(base_size = 12 , base_family = "")
{
half_line <- base_size/2
colors <- ggthemes_data$few
gray <- colors$medium["gray"]
black <- colors$dark["black"]
theme(
line = element_line(colour = "black", size = 0.5, linetype = 1, lineend = "butt"),
rect = element_rect(fill = "white",
colour = "black", size = 0.5, linetype = 1),
text = element_text(family = base_family, face = "plain", colour = "black",
size = base_size, lineheight = 0.9, hjust = 0.5, vjust = 0.5,
angle = 0, margin = margin(), debug = FALSE),
axis.line = element_blank(),
axis.line.x = NULL,
axis.line.y = NULL,
axis.text = element_text(size = rel(0.8), colour = "grey30"),
axis.text.x = element_text(margin = margin(t = 0.8 * half_line/2), vjust = 1),
axis.text.x.top = element_text(margin = margin(b = 0.8 * half_line/2), vjust = 0),
axis.text.y = element_text(margin = margin(r = 0.8 * half_line/2), hjust = 1),
axis.text.y.right = element_text(margin = margin(l = 0.8 * half_line/2), hjust = 0),
axis.ticks = element_line(colour = "grey20"),
axis.ticks.length = unit(half_line/2, "pt"),
axis.title.x = element_text(margin = margin(t = half_line), vjust = 1),
axis.title.x.top = element_text(margin = margin(b = half_line), vjust = 0),
axis.title.y = element_text(angle = 90, margin = margin(r = half_line), vjust = 1),
axis.title.y.right = element_text(angle = -90, margin = margin(l = half_line), vjust = 0),
legend.background = element_rect(colour = NA),
legend.spacing = unit(0.4, "cm"),
legend.spacing.x = NULL,
legend.spacing.y = NULL,
legend.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"),
legend.key = element_rect(fill = "white", colour = NA),
legend.key.size = unit(1.2, "lines"),
legend.key.height = NULL,
legend.key.width = NULL,
legend.text = element_text(size = rel(0.8)),
legend.text.align = NULL,
legend.title = element_text(hjust = 0),
legend.title.align = NULL,
legend.position = "right",
legend.direction = NULL,
legend.justification = "center",
legend.box = NULL,
legend.box.margin = margin(0, 0, 0, 0, "cm"),
legend.box.background = element_blank(),
legend.box.spacing = unit(0.4, "cm"),
panel.background = element_rect(fill = "white", colour = NA),
panel.border = element_rect(fill = NA, colour = "grey20"),
panel.grid.major = element_line(colour = "grey92"),
panel.grid.minor = element_line(colour = "grey92", size = 0.25),
panel.spacing = unit(half_line, "pt"),
panel.spacing.x = NULL,
panel.spacing.y = NULL,
panel.ontop = FALSE,
strip.background = element_rect(fill = "NA", colour = "NA"),
strip.text = element_text(colour = "grey10", size = rel(0.8)),
strip.text.x = element_text(margin = margin(t = half_line, b = half_line)),
strip.text.y = element_text(angle = 0, margin = margin(l = half_line, r = half_line)),
strip.placement = "inside",
strip.placement.x = NULL,
strip.placement.y = NULL,
strip.switch.pad.grid = unit(0.1, "cm"),
strip.switch.pad.wrap = unit(0.1, "cm"),
plot.background = element_rect(colour = "white"),
plot.title = element_text(size = rel(1.2), hjust = 0, vjust = 1, margin = margin(b = half_line * 1.2)),
plot.subtitle = element_text(size = rel(0.9), hjust = 0, vjust = 1, margin = margin(b = half_line * 0.9)),
plot.caption = element_text(size = rel(0.9), hjust = 1, vjust = 1, margin = margin(t = half_line * 0.9)),
plot.margin = margin(half_line, half_line, half_line, half_line),
complete = TRUE)
}
WestData <- read.csv("C:/Users/Alan Nielsen/Google Drive/Publications/Motivated vs Conventional Systematicity/Data/WestburyDataFull.csv")
#Cutting down the dataset to only words that use our phonemes of interest
#Vowels
#I = "\u026A" = ɪ = IH
#UH = "\u028C" = ʌ = UX
#EH = "\u025B" = ɛ = EH
#UGH = "\u028A" = ʊ = UH BUT NOT FOUND IN DATA
#ee = "i" = i = IY
#ew = "\u028F" = ʏ = NOT USED IN WESTBURY
#ah = "\u0251" = ɑ = NOT USED IN WESTBURY
#aw = "\u0252" = ɒ = AA
#th- "\u03B8" = θ = T
#thh- "\u00F0" = ð = xxxD - BUT NOT FOUND IN DATA
#sh- "\u0283" = ʃ = S
#dz- "\u02A4" = ʤ = J - BUT NOT FOUND IN DATA
#p,t,m,n - all standard
#vectors for mapvalues
fromlist = c("IH", "UX", "EH", "IY", "AA", "T", "D", "S")
tolist = c("\u026A", "\u028C", "\u025B", "i", "\u0252","\u03B8", "\u00F0", "\u0283")
fromtype = c(fromlist, "p", "t", "m", "n")
totype1 = c("Conventional", "Conventional", "Conventional", "Iconic", "Iconic", "Conventional",
"Conventional", "Conventional", rep("Iconic", 4))
totype2 = c(rep("Vowel", 5), rep("Consonant", 7))
#we're only going to use the zROUND.zSHARP and zLARGE.zSMALL columns of the actual data here
#cut the dataframe down
WestTrim <- subset(WestData, select = c(PHONO,
p,t,m,n,T,xxxD,S, #CONSONANTS
IH, UX, EH, IY, AA, #VOWELS
zLARGE.zSMALL, zROUND.zSHARP )) #METRICS
#Now we delete words that don't have any of our phonemes of interest
WestTrim$sum <- rowSums(WestTrim[2:13])
WestTrim %<>%
subset(sum != 0) %>%
subset(select = -c(sum))
We’ve loaded in the data now- the full dataset is very large and includes many associations between the tested words and semantic dimensions, but what we’re interested in are Shape (Jagged vs. Curvy - Exp 1 and 2) and Size (Large vs. Small- Exp)
As the quickest way of exploring this data, we’ve taken a single column for each of these.
For shape, this is the difference between the Z score for Curved and the z Score for Jagged associations - so a high positive score in this column means the word was highly associated with curviness, while a low negative score means that the word was highly associated with jaggedness. Scores near zero reflect words associated with neither curviness nor jaggedness.
For Size, the data is similar- a high positive score means the word was associated with largeness, and a negative score means the word was associated with smallness.
The actual models reported in Westbury et al. use only significant model predictors- thus individual phonemes could not be extracted from the results of the paper. So, for the purposes of this analysis, we’re simply going to use averages of these z-score differences. For example if we want to know whether /t/ is jagged or curvy, we will take the average ZScore difference for Shape in all words containing /t/. Thus, the approach we’re taking here does not make any assumptions about statistical significance.
First we’ll take a look at Experiment 1
##Experiment 1
In Experiment 1, we compared the learnability of artificial lexica that were either:
Iconic (m, n = curvy; t, p = jagged) Counter-Iconic (m,n = jagged; t,p = curvy) Conventional (Θ, ð = jagged; ʃ, ʒ= curved (or vice-versa, counterbalanced between participants)) Mixed (m, Θ = jagged; p, ʃ = curvy (this is on example of a possible configuration, of which there were many))
For our purposes, the important consideration is just the relative ranking of the full 8 consonants for Shape, so let’s calculate that
We should note before starting this that Westbury et. al did not test all of the consonants used in this experiment- thus we will only be testing the consonants p, t, m, n , Θ, ʃ, ʒ (That is, we will not be testing ð)(in the notation for Westbury et al’s data file, these phonemes are represented as p,t,m,n,T,S,Z) and the vowels used in Experiment 1 (ɛ and ʌ) are represented as EH and UX respectiveyl, with UH not actually being included in the data
#Converting to Long Format
WestTrimLong <-
WestTrim %>%
pivot_longer(
cols = p:AA,
names_to= "Phoneme",
values_to= "Contains"
)
Westbury.Size <-
WestTrimLong %>%
subset(Contains == "1") %>%
summarySE(measurevar = "zLARGE.zSMALL",
groupvars = "Phoneme")
Westbury.Shape <-
WestTrimLong %>%
subset(Contains == "1") %>%
summarySE(measurevar = "zROUND.zSHARP",
groupvars = "Phoneme")
Westbury.Summary <- cbind.data.frame(Westbury.Size$Phoneme, Westbury.Size$zLARGE.zSMALL, Westbury.Size$se,
Westbury.Shape$zROUND.zSHARP, Westbury.Shape$se)
colnames(Westbury.Summary) <- c("Phoneme", "Large-Mean", "Large-SE", "Round-Mean", "Round-SE")
Westbury.Summary$Type <- mapvalues(Westbury.Summary$Phoneme, from = fromtype, to=totype2)
Westbury.Summary$Mapping <- mapvalues(Westbury.Summary$Phoneme, from = fromtype, to=totype1)
Westbury.Summary$Phoneme <- mapvalues(Westbury.Summary$Phoneme, from = fromlist, to = tolist)
#Overall summary table
Westbury.Summary %>%
mutate_if(is.numeric, round, digits = 3) %>%
dplyr::select(Phoneme, Type:Mapping, 2:5) %>%
knitr::kable(caption = "Westbury - Phonemes Table", ) %>%
kable_styling(full_width= F)
| Phoneme | Type | Mapping | Large-Mean | Large-SE | Round-Mean | Round-SE |
|---|---|---|---|---|---|---|
| ɒ | Vowel | Iconic | 0.370 | 0.096 | 0.293 | 0.096 |
| ɛ | Vowel | Conventional | -0.602 | 0.051 | -0.518 | 0.062 |
| ɪ | Vowel | Conventional | -0.278 | 0.038 | -0.661 | 0.040 |
| i | Vowel | Iconic | -0.184 | 0.042 | 0.101 | 0.045 |
| m | Consonant | Iconic | -0.178 | 0.042 | 0.423 | 0.042 |
| n | Consonant | Iconic | 0.040 | 0.032 | 0.093 | 0.031 |
| p | Consonant | Iconic | -0.566 | 0.049 | 0.249 | 0.055 |
| ʃ | Consonant | Conventional | 0.746 | 0.075 | 0.341 | 0.085 |
| t | Consonant | Iconic | -0.292 | 0.034 | -1.264 | 0.030 |
| θ | Consonant | Conventional | 0.728 | 0.102 | -0.998 | 0.077 |
| ʌ | Vowel | Conventional | -0.304 | 0.040 | 0.036 | 0.043 |
##Consonants x Shape
Westbury.Summary.ConsShape <-
Westbury.Summary %>%
mutate_if(is.numeric, round, digits = 3) %>%
filter(Type == "Consonant")%>%
dplyr::select(Phoneme, Type:Mapping, 4:5) %>%
arrange(`Round-Mean`)
Westbury.Summary.ConsShape %>%
knitr::kable(caption = "Westbury - Consonants x Shape", ) %>%
kable_styling(full_width= F)
| Phoneme | Type | Mapping | Round-Mean | Round-SE |
|---|---|---|---|---|
| t | Consonant | Iconic | -1.264 | 0.030 |
| θ | Consonant | Conventional | -0.998 | 0.077 |
| n | Consonant | Iconic | 0.093 | 0.031 |
| p | Consonant | Iconic | 0.249 | 0.055 |
| ʃ | Consonant | Conventional | 0.341 | 0.085 |
| m | Consonant | Iconic | 0.423 | 0.042 |
##Vowels x Size
Westbury.Summary %>%
mutate_if(is.numeric, round, digits = 3) %>%
filter(Type == "Vowel")%>%
dplyr::select(Phoneme, Type:Mapping, 2:3) %>%
arrange(`Large-Mean`)%>%
knitr::kable(caption = "Westbury - Vowels x Size", ) %>%
kable_styling(full_width= F)
| Phoneme | Type | Mapping | Large-Mean | Large-SE |
|---|---|---|---|---|
| ɛ | Vowel | Conventional | -0.602 | 0.051 |
| ʌ | Vowel | Conventional | -0.304 | 0.040 |
| ɪ | Vowel | Conventional | -0.278 | 0.038 |
| i | Vowel | Iconic | -0.184 | 0.042 |
| ɒ | Vowel | Iconic | 0.370 | 0.096 |
##Consonants x Size
Westbury.Summary %>%
mutate_if(is.numeric, round, digits = 3) %>%
filter(Type == "Consonant")%>%
dplyr::select(Phoneme, Type:Mapping, 2:3) %>%
arrange(`Large-Mean`)%>%
knitr::kable(caption = "Westbury - Consonants x Size", ) %>%
kable_styling(full_width= F)
| Phoneme | Type | Mapping | Large-Mean | Large-SE |
|---|---|---|---|---|
| p | Consonant | Iconic | -0.566 | 0.049 |
| t | Consonant | Iconic | -0.292 | 0.034 |
| m | Consonant | Iconic | -0.178 | 0.042 |
| n | Consonant | Iconic | 0.040 | 0.032 |
| θ | Consonant | Conventional | 0.728 | 0.102 |
| ʃ | Consonant | Conventional | 0.746 | 0.075 |
##Vowels x Shape
Westbury.Summary %>%
mutate_if(is.numeric, round, digits = 3) %>%
filter(Type == "Vowel")%>%
dplyr::select(Phoneme, Type:Mapping, 4:5) %>%
arrange(`Round-Mean`)%>%
knitr::kable(caption = "Westbury - Vowels x Shape", ) %>%
kable_styling(full_width= F)
| Phoneme | Type | Mapping | Round-Mean | Round-SE |
|---|---|---|---|---|
| ɪ | Vowel | Conventional | -0.661 | 0.040 |
| ɛ | Vowel | Conventional | -0.518 | 0.062 |
| ʌ | Vowel | Conventional | 0.036 | 0.043 |
| i | Vowel | Iconic | 0.101 | 0.045 |
| ɒ | Vowel | Iconic | 0.293 | 0.096 |
library(lme4)
#No need for mixed models or anything fancy here. A simple glm will do it
#Shape Model
WestTrimLong$Phoneme <- factor(WestTrimLong$Phoneme)
WestModel.Shape <- glm(zROUND.zSHARP ~ Phoneme, data=subset(WestTrimLong, Contains == 1))
TableReplace5<-
summary(WestModel.Shape)$coefficients %>%
data.frame() %>%
tibble::rownames_to_column(var= "FixedEffect") %>%
`colnames<-`(c("Phoneme", "Estimate", "SE", "t", "p")) %>%
mutate_if(is.numeric, round, digits = 3) %>%
mutate_at("p", funs(ifelse(.<0.001, "<0.001", .)))
#reformat table, restoring parts fucked up by passing through glm
TableReplace5$Phoneme <- str_remove(TableReplace5$Phoneme, "Phoneme")
TableReplace5$Type <- mapvalues(TableReplace5$Phoneme, from = fromtype, to=totype2)
TableReplace5$Mapping <- mapvalues(TableReplace5$Phoneme, from = fromtype, to=totype1)
TableReplace5$Phoneme <- mapvalues(TableReplace5$Phoneme, from = fromlist, to = tolist)
TableReplace5 %>%
dplyr::select(Phoneme, Type:Mapping, 2:5) %>%
knitr::kable(caption = "Westbury - Shape ANOVA Table", ) %>%
kable_styling(full_width= F)
| Phoneme | Type | Mapping | Estimate | SE | t | p |
|---|---|---|---|---|---|---|
| (Intercept) | (Intercept) | (Intercept) | 0.293 | 0.099 | 2.964 | 0.003 |
| ɛ | Vowel | Conventional | -0.811 | 0.113 | -7.176 | <0.001 |
| ɪ | Vowel | Conventional | -0.954 | 0.105 | -9.051 | <0.001 |
| i | Vowel | Iconic | -0.192 | 0.108 | -1.778 | 0.075 |
| m | Consonant | Iconic | 0.130 | 0.108 | 1.205 | 0.228 |
| n | Consonant | Iconic | -0.200 | 0.104 | -1.927 | 0.054 |
| p | Consonant | Iconic | -0.044 | 0.113 | -0.387 | 0.699 |
| ʃ | Consonant | Conventional | 0.049 | 0.132 | 0.370 | 0.711 |
| t | Consonant | Iconic | -1.556 | 0.105 | -14.797 | <0.001 |
| θ | Consonant | Conventional | -1.291 | 0.151 | -8.566 | <0.001 |
| ʌ | Vowel | Conventional | -0.257 | 0.107 | -2.396 | 0.017 |
#
#Size Model
WestModel.Size <- glm(zLARGE.zSMALL ~ Phoneme, data=subset(WestTrimLong, Contains == 1))
TableReplace6<-
summary(WestModel.Size)$coefficients %>%
data.frame() %>%
tibble::rownames_to_column(var= "FixedEffect") %>%
`colnames<-`(c("Phoneme", "Estimate", "SE", "t", "p")) %>%
mutate_if(is.numeric, round, digits = 3) %>%
mutate_at("p", funs(ifelse(.<0.001, "<0.001", .)))
#reformat table, restoring parts fucked up by passing through glm
TableReplace6$Phoneme <- str_remove(TableReplace6$Phoneme, "Phoneme")
TableReplace6$Type <- mapvalues(TableReplace6$Phoneme, from = fromtype, to=totype2)
TableReplace6$Mapping <- mapvalues(TableReplace6$Phoneme, from = fromtype, to=totype1)
TableReplace6$Phoneme <- mapvalues(TableReplace6$Phoneme, from = fromlist, to = tolist)
TableReplace6 %>%
dplyr::select(Phoneme, Type:Mapping, 2:5) %>%
knitr::kable(caption = "Westbury - Shape ANOVA Table", ) %>%
kable_styling(full_width= F)
| Phoneme | Type | Mapping | Estimate | SE | t | p |
|---|---|---|---|---|---|---|
| (Intercept) | (Intercept) | (Intercept) | 0.370 | 0.096 | 3.845 | <0.001 |
| ɛ | Vowel | Conventional | -0.972 | 0.110 | -8.834 | <0.001 |
| ɪ | Vowel | Conventional | -0.648 | 0.103 | -6.315 | <0.001 |
| i | Vowel | Iconic | -0.554 | 0.105 | -5.283 | <0.001 |
| m | Consonant | Iconic | -0.547 | 0.105 | -5.217 | <0.001 |
| n | Consonant | Iconic | -0.330 | 0.101 | -3.268 | 0.001 |
| p | Consonant | Iconic | -0.935 | 0.110 | -8.535 | <0.001 |
| ʃ | Consonant | Conventional | 0.377 | 0.128 | 2.943 | 0.003 |
| t | Consonant | Iconic | -0.661 | 0.102 | -6.464 | <0.001 |
| θ | Consonant | Conventional | 0.359 | 0.147 | 2.446 | 0.014 |
| ʌ | Vowel | Conventional | -0.674 | 0.104 | -6.458 | <0.001 |
##Phonemes vs. Chance for Shape
WestModel.Shape.vChance <- test(emmeans(WestModel.Shape,
~Phoneme,
adjust = "tukey"))
WestModel.Shape.vChance$Type <- mapvalues(WestModel.Shape.vChance$Phoneme, from = fromtype, to=totype2)
WestModel.Shape.vChance$Mapping <- mapvalues(WestModel.Shape.vChance$Phoneme, from = fromtype, to=totype1)
WestModel.Shape.vChance$Phoneme <- mapvalues(WestModel.Shape.vChance$Phoneme, from = fromlist, to = tolist)
WestModel.Shape.vChance %<>%
dplyr::select(1, 7:8, 2:6) %>%
`colnames<-`(c("Phoneme", "Type", "Mapping", "em mean", "SE", "df", "z ratio", "p")) %>%
mutate_if(is.numeric, round, 2) %>%
mutate_at(.vars = vars("p"), funs(ifelse(.<0.001, "<0.001", .)))
merge(Westbury.Summary,WestModel.Shape.vChance, by= "Phoneme") %>%
dplyr::select(1, 6:7, 4:5, 13:14) %>%
arrange(`Round-Mean`) %>%
knitr::kable(caption = 'Westbury- Phoneme-Shape Mappings vs. Chance') %>%
kable_styling(full_width = F)
| Phoneme | Type.x | Mapping.x | Round-Mean | Round-SE | z ratio | p |
|---|---|---|---|---|---|---|
| t | Consonant | Iconic | -1.2635224 | 0.0304130 | -34.96 | <0.001 |
| θ | Consonant | Conventional | -0.9978556 | 0.0774796 | -8.77 | <0.001 |
| ɪ | Vowel | Conventional | -0.6611889 | 0.0395652 | -17.98 | <0.001 |
| ɛ | Vowel | Conventional | -0.5182485 | 0.0624254 | -9.43 | <0.001 |
| ʌ | Vowel | Conventional | 0.0358692 | 0.0430849 | 0.86 | 0.39 |
| n | Consonant | Iconic | 0.0930900 | 0.0313776 | 2.97 | <0.001 |
| i | Vowel | Iconic | 0.1012768 | 0.0451630 | 2.36 | 0.02 |
| p | Consonant | Iconic | 0.2491937 | 0.0547132 | 4.60 | <0.001 |
| ɒ | Vowel | Iconic | 0.2927984 | 0.0955280 | 2.96 | <0.001 |
| ʃ | Consonant | Conventional | 0.3414687 | 0.0845604 | 3.93 | <0.001 |
| m | Consonant | Iconic | 0.4227083 | 0.0417356 | 9.79 | <0.001 |
WestModel.Size.vChance <- test(emmeans(WestModel.Size,
~Phoneme,
adjust = "tukey"))
WestModel.Size.vChance$Type <- mapvalues(WestModel.Size.vChance$Phoneme, from = fromtype, to=totype2)
WestModel.Size.vChance$Mapping <- mapvalues(WestModel.Size.vChance$Phoneme, from = fromtype, to=totype1)
WestModel.Size.vChance$Phoneme <- mapvalues(WestModel.Size.vChance$Phoneme, from = fromlist, to = tolist)
WestModel.Size.vChance %<>%
dplyr::select(1, 7:8, 2:6) %>%
`colnames<-`(c("Phoneme", "Type", "Mapping", "em mean", "SE", "df", "z ratio", "p")) %>%
mutate_if(is.numeric, round, 2) %>%
mutate_at(.vars = vars("p"), funs(ifelse(.<0.001, "<0.001", .)))
merge(Westbury.Summary,WestModel.Size.vChance, by= "Phoneme") %>%
dplyr::select(1, 6:7, 2:3, 13:14) %>%
arrange(`Large-Mean`) %>%
knitr::kable(caption = 'Westbury- Phoneme-Size Mappings vs. Chance') %>%
kable_styling(full_width = F)
| Phoneme | Type.x | Mapping.x | Large-Mean | Large-SE | z ratio | p |
|---|---|---|---|---|---|---|
| ɛ | Vowel | Conventional | -0.6020379 | 0.0512162 | -11.26 | <0.001 |
| p | Consonant | Iconic | -0.5658686 | 0.0490263 | -10.74 | <0.001 |
| ʌ | Vowel | Conventional | -0.3043695 | 0.0403417 | -7.49 | <0.001 |
| t | Consonant | Iconic | -0.2919475 | 0.0344894 | -8.30 | <0.001 |
| ɪ | Vowel | Conventional | -0.2781386 | 0.0375739 | -7.77 | <0.001 |
| i | Vowel | Iconic | -0.1841797 | 0.0417106 | -4.41 | <0.001 |
| m | Consonant | Iconic | -0.1777205 | 0.0424066 | -4.23 | <0.001 |
| n | Consonant | Iconic | 0.0399707 | 0.0315321 | 1.31 | 0.19 |
| ɒ | Vowel | Iconic | 0.3695294 | 0.0963275 | 3.84 | <0.001 |
| θ | Consonant | Conventional | 0.7281002 | 0.1015028 | 6.58 | <0.001 |
| ʃ | Consonant | Conventional | 0.7462178 | 0.0745745 | 8.83 | <0.001 |