Westbury Exploration

#Introduction In this document we’re going to explore data from Westbury et al. (2018) in an attempt to take a look at the “neutrality” of the conventional consonants and vowels used in Nielsen et al (this manuscript).

This raw data was obtained from David Sidhu, one of the authors of Westbury et al. (2018) for us to explore. Thanks to David, Chris, and those others involved.

First we’ll read in the data and load some libraries etc (note we’re loading in the fraction of the total data from that experiment most germane to our purposes)

library(lme4)
library(plyr)
library(ggplot2)
library(afex)
library(emmeans)
library(ggthemes)
library(tidyverse)
library(kableExtra)
library(Hmisc)
library(binom)
library(Rmisc)
library(magick)
library(webshot)
library(magrittr)
library(multcomp)

theme_alan <- function(base_size = 12 , base_family = "")
{
  half_line <- base_size/2
  colors <- ggthemes_data$few
  gray <- colors$medium["gray"]
  black <- colors$dark["black"]
  
  theme(
    line = element_line(colour = "black", size = 0.5, linetype = 1, lineend = "butt"),
    rect = element_rect(fill = "white", 
                        colour = "black", size = 0.5, linetype = 1),
    text = element_text(family = base_family, face = "plain", colour = "black", 
                        size = base_size, lineheight = 0.9, hjust = 0.5, vjust = 0.5,
                        angle = 0, margin = margin(), debug = FALSE),
    
    axis.line = element_blank(),
    axis.line.x = NULL,
    axis.line.y = NULL, 
    axis.text = element_text(size = rel(0.8), colour = "grey30"),
    axis.text.x = element_text(margin = margin(t = 0.8 * half_line/2), vjust = 1),
    axis.text.x.top = element_text(margin = margin(b = 0.8 * half_line/2), vjust = 0),
    axis.text.y = element_text(margin = margin(r = 0.8 * half_line/2), hjust = 1),
    axis.text.y.right = element_text(margin = margin(l = 0.8 * half_line/2), hjust = 0), 
    axis.ticks = element_line(colour = "grey20"), 
    axis.ticks.length = unit(half_line/2, "pt"),
    axis.title.x = element_text(margin = margin(t = half_line), vjust = 1),
    axis.title.x.top = element_text(margin = margin(b = half_line), vjust = 0),
    axis.title.y = element_text(angle = 90, margin = margin(r = half_line), vjust = 1),
    axis.title.y.right = element_text(angle = -90, margin = margin(l = half_line), vjust = 0),
    
    legend.background = element_rect(colour = NA),
    legend.spacing = unit(0.4, "cm"), 
    legend.spacing.x = NULL, 
    legend.spacing.y = NULL,
    legend.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"),
    legend.key = element_rect(fill = "white", colour = NA), 
    legend.key.size = unit(1.2, "lines"), 
    legend.key.height = NULL,
    legend.key.width = NULL,
    legend.text = element_text(size = rel(0.8)), 
    legend.text.align = NULL,
    legend.title = element_text(hjust = 0),
    legend.title.align = NULL,
    legend.position = "right", 
    legend.direction = NULL,
    legend.justification = "center", 
    legend.box = NULL,
    legend.box.margin = margin(0, 0, 0, 0, "cm"),
    legend.box.background = element_blank(),
    legend.box.spacing = unit(0.4, "cm"),
    
    panel.background = element_rect(fill = "white", colour = NA),
    panel.border = element_rect(fill = NA, colour = "grey20"),
    panel.grid.major = element_line(colour = "grey92"),
    panel.grid.minor = element_line(colour = "grey92", size = 0.25),
    panel.spacing = unit(half_line, "pt"),
    panel.spacing.x = NULL,
    panel.spacing.y = NULL,
    panel.ontop = FALSE,
    
    strip.background = element_rect(fill = "NA", colour = "NA"),
    strip.text = element_text(colour = "grey10", size = rel(0.8)),
    strip.text.x = element_text(margin = margin(t = half_line, b = half_line)),
    strip.text.y = element_text(angle = 0, margin = margin(l = half_line, r = half_line)),
    strip.placement = "inside",
    strip.placement.x = NULL, 
    strip.placement.y = NULL,
    strip.switch.pad.grid = unit(0.1, "cm"), 
    strip.switch.pad.wrap = unit(0.1, "cm"), 
    
    plot.background = element_rect(colour = "white"),
    plot.title = element_text(size = rel(1.2), hjust = 0, vjust = 1, margin = margin(b = half_line * 1.2)),
    plot.subtitle = element_text(size = rel(0.9), hjust = 0, vjust = 1, margin = margin(b = half_line * 0.9)),
    plot.caption = element_text(size = rel(0.9), hjust = 1, vjust = 1, margin = margin(t = half_line * 0.9)), 
    plot.margin = margin(half_line, half_line, half_line, half_line),
    
    complete = TRUE)
}

WestData <- read.csv("C:/Users/Alan Nielsen/Google Drive/Publications/Motivated vs Conventional Systematicity/Data/WestburyDataFull.csv")

#Cutting down the dataset to only words that use our phonemes of interest
#Vowels 
#I = "\u026A" = ɪ = IH
#UH = "\u028C" = ʌ = UX
#EH = "\u025B" = ɛ = EH
#UGH = "\u028A" = ʊ = UH BUT NOT FOUND IN DATA
#ee = "i" = i = IY 
#ew = "\u028F" = ʏ = NOT USED IN WESTBURY
#ah = "\u0251" = ɑ = NOT USED IN WESTBURY
#aw = "\u0252" = ɒ = AA
#th- "\u03B8" = θ = T
#thh- "\u00F0" = ð = xxxD - BUT NOT FOUND IN DATA
#sh- "\u0283" = ʃ = S
#dz- "\u02A4" = ʤ = J - BUT NOT FOUND IN DATA
#p,t,m,n - all standard

#vectors for mapvalues

fromlist = c("IH", "UX", "EH", "IY", "AA", "T", "D", "S")
tolist = c("\u026A", "\u028C", "\u025B", "i", "\u0252","\u03B8", "\u00F0", "\u0283")
fromtype = c(fromlist, "p", "t", "m", "n")
totype1 = c("Conventional", "Conventional", "Conventional", "Iconic", "Iconic", "Conventional",
            "Conventional", "Conventional", rep("Iconic", 4))
totype2 = c(rep("Vowel", 5), rep("Consonant", 7))

#we're only going to use the zROUND.zSHARP and zLARGE.zSMALL columns of the actual data here
#cut the dataframe down

WestTrim <- subset(WestData, select = c(PHONO, 
                                        p,t,m,n,T,xxxD,S, #CONSONANTS
                                        IH, UX, EH, IY, AA, #VOWELS
                                        zLARGE.zSMALL, zROUND.zSHARP )) #METRICS

#Now we delete words that don't have any of our phonemes of interest
WestTrim$sum <- rowSums(WestTrim[2:13])

WestTrim %<>% 
  subset(sum != 0) %>%
    subset(select = -c(sum))

We’ve loaded in the data now- the full dataset is very large and includes many associations between the tested words and semantic dimensions, but what we’re interested in are Shape (Jagged vs. Curvy - Exp 1 and 2) and Size (Large vs. Small- Exp)

As the quickest way of exploring this data, we’ve taken a single column for each of these.

For shape, this is the difference between the Z score for Curved and the z Score for Jagged associations - so a high positive score in this column means the word was highly associated with curviness, while a low negative score means that the word was highly associated with jaggedness. Scores near zero reflect words associated with neither curviness nor jaggedness.

For Size, the data is similar- a high positive score means the word was associated with largeness, and a negative score means the word was associated with smallness.

The actual models reported in Westbury et al. use only significant model predictors- thus individual phonemes could not be extracted from the results of the paper. So, for the purposes of this analysis, we’re simply going to use averages of these z-score differences. For example if we want to know whether /t/ is jagged or curvy, we will take the average ZScore difference for Shape in all words containing /t/. Thus, the approach we’re taking here does not make any assumptions about statistical significance.

First we’ll take a look at Experiment 1

##Experiment 1

In Experiment 1, we compared the learnability of artificial lexica that were either:

Iconic (m, n = curvy; t, p = jagged) Counter-Iconic (m,n = jagged; t,p = curvy) Conventional (Θ, ð = jagged; ʃ, ʒ= curved (or vice-versa, counterbalanced between participants)) Mixed (m, Θ = jagged; p, ʃ = curvy (this is on example of a possible configuration, of which there were many))

For our purposes, the important consideration is just the relative ranking of the full 8 consonants for Shape, so let’s calculate that

We should note before starting this that Westbury et. al did not test all of the consonants used in this experiment- thus we will only be testing the consonants p, t, m, n , Θ, ʃ, ʒ (That is, we will not be testing ð)(in the notation for Westbury et al’s data file, these phonemes are represented as p,t,m,n,T,S,Z) and the vowels used in Experiment 1 (ɛ and ʌ) are represented as EH and UX respectiveyl, with UH not actually being included in the data

Consonants - Shape

#Converting to Long Format

WestTrimLong <- 
  WestTrim %>%
  pivot_longer(
    cols = p:AA,
    names_to= "Phoneme",
    values_to= "Contains"
  )

Westbury.Size <- 
  WestTrimLong %>%
  subset(Contains == "1") %>%
    summarySE(measurevar = "zLARGE.zSMALL",
              groupvars = "Phoneme")

Westbury.Shape <- 
  WestTrimLong %>%
  subset(Contains == "1") %>%
    summarySE(measurevar = "zROUND.zSHARP",
              groupvars = "Phoneme")

Westbury.Summary <- cbind.data.frame(Westbury.Size$Phoneme, Westbury.Size$zLARGE.zSMALL, Westbury.Size$se,
                          Westbury.Shape$zROUND.zSHARP, Westbury.Shape$se)

colnames(Westbury.Summary) <- c("Phoneme", "Large-Mean", "Large-SE", "Round-Mean", "Round-SE")


Westbury.Summary$Type <- mapvalues(Westbury.Summary$Phoneme, from = fromtype, to=totype2)
Westbury.Summary$Mapping <- mapvalues(Westbury.Summary$Phoneme, from = fromtype, to=totype1)
Westbury.Summary$Phoneme <- mapvalues(Westbury.Summary$Phoneme, from = fromlist, to = tolist)

#Overall summary table
Westbury.Summary %>%
  mutate_if(is.numeric, round, digits = 3) %>% 
    dplyr::select(Phoneme, Type:Mapping, 2:5) %>% 
      knitr::kable(caption = "Westbury - Phonemes Table", ) %>%
        kable_styling(full_width= F)

Westbury - Phonemes Table
Phoneme	Type	Mapping	Large-Mean	Large-SE	Round-Mean	Round-SE
ɒ	Vowel	Iconic	0.370	0.096	0.293	0.096
ɛ	Vowel	Conventional	-0.602	0.051	-0.518	0.062
ɪ	Vowel	Conventional	-0.278	0.038	-0.661	0.040
i	Vowel	Iconic	-0.184	0.042	0.101	0.045
m	Consonant	Iconic	-0.178	0.042	0.423	0.042
n	Consonant	Iconic	0.040	0.032	0.093	0.031
p	Consonant	Iconic	-0.566	0.049	0.249	0.055
ʃ	Consonant	Conventional	0.746	0.075	0.341	0.085
t	Consonant	Iconic	-0.292	0.034	-1.264	0.030
θ	Consonant	Conventional	0.728	0.102	-0.998	0.077
ʌ	Vowel	Conventional	-0.304	0.040	0.036	0.043

##Consonants x Shape
Westbury.Summary.ConsShape <- 
  Westbury.Summary %>%
    mutate_if(is.numeric, round, digits = 3) %>%
      filter(Type  == "Consonant")%>% 
      dplyr::select(Phoneme, Type:Mapping, 4:5) %>% 
        arrange(`Round-Mean`)

Westbury.Summary.ConsShape %>% 
      knitr::kable(caption = "Westbury - Consonants x Shape", ) %>%
        kable_styling(full_width= F)

Westbury - Consonants x Shape
Phoneme	Type	Mapping	Round-Mean	Round-SE
t	Consonant	Iconic	-1.264	0.030
θ	Consonant	Conventional	-0.998	0.077
n	Consonant	Iconic	0.093	0.031
p	Consonant	Iconic	0.249	0.055
ʃ	Consonant	Conventional	0.341	0.085
m	Consonant	Iconic	0.423	0.042

##Vowels x Size
Westbury.Summary %>%
  mutate_if(is.numeric, round, digits = 3) %>%
    filter(Type  == "Vowel")%>% 
    dplyr::select(Phoneme, Type:Mapping, 2:3) %>% 
      arrange(`Large-Mean`)%>% 
      knitr::kable(caption = "Westbury - Vowels x Size", ) %>%
        kable_styling(full_width= F)

Westbury - Vowels x Size
Phoneme	Type	Mapping	Large-Mean	Large-SE
ɛ	Vowel	Conventional	-0.602	0.051
ʌ	Vowel	Conventional	-0.304	0.040
ɪ	Vowel	Conventional	-0.278	0.038
i	Vowel	Iconic	-0.184	0.042
ɒ	Vowel	Iconic	0.370	0.096

##Consonants x Size
Westbury.Summary %>%
  mutate_if(is.numeric, round, digits = 3) %>%
    filter(Type  == "Consonant")%>% 
    dplyr::select(Phoneme, Type:Mapping, 2:3) %>% 
      arrange(`Large-Mean`)%>% 
      knitr::kable(caption = "Westbury - Consonants x Size", ) %>%
        kable_styling(full_width= F)

Westbury - Consonants x Size
Phoneme	Type	Mapping	Large-Mean	Large-SE
p	Consonant	Iconic	-0.566	0.049
t	Consonant	Iconic	-0.292	0.034
m	Consonant	Iconic	-0.178	0.042
n	Consonant	Iconic	0.040	0.032
θ	Consonant	Conventional	0.728	0.102
ʃ	Consonant	Conventional	0.746	0.075

##Vowels x Shape
Westbury.Summary %>%
  mutate_if(is.numeric, round, digits = 3) %>%
    filter(Type  == "Vowel")%>% 
    dplyr::select(Phoneme, Type:Mapping, 4:5) %>% 
      arrange(`Round-Mean`)%>% 
      knitr::kable(caption = "Westbury - Vowels x Shape", ) %>%
        kable_styling(full_width= F)

Westbury - Vowels x Shape
Phoneme	Type	Mapping	Round-Mean	Round-SE
ɪ	Vowel	Conventional	-0.661	0.040
ɛ	Vowel	Conventional	-0.518	0.062
ʌ	Vowel	Conventional	0.036	0.043
i	Vowel	Iconic	0.101	0.045
ɒ	Vowel	Iconic	0.293	0.096

library(lme4)
#No need for mixed models or anything fancy here. A simple glm will do it 

#Shape Model
WestTrimLong$Phoneme <- factor(WestTrimLong$Phoneme)
WestModel.Shape <- glm(zROUND.zSHARP ~ Phoneme, data=subset(WestTrimLong, Contains == 1))

TableReplace5<- 
  summary(WestModel.Shape)$coefficients %>%
    data.frame() %>%
      tibble::rownames_to_column(var= "FixedEffect") %>%
      `colnames<-`(c("Phoneme", "Estimate", "SE", "t", "p")) %>%
        mutate_if(is.numeric, round, digits = 3) %>% 
          mutate_at("p", funs(ifelse(.<0.001, "<0.001", .)))

#reformat table, restoring parts fucked up by passing through glm
TableReplace5$Phoneme <- str_remove(TableReplace5$Phoneme, "Phoneme")

TableReplace5$Type <- mapvalues(TableReplace5$Phoneme, from = fromtype, to=totype2)
TableReplace5$Mapping <- mapvalues(TableReplace5$Phoneme, from = fromtype, to=totype1)
TableReplace5$Phoneme <- mapvalues(TableReplace5$Phoneme, from = fromlist, to = tolist)

TableReplace5 %>% 
    dplyr::select(Phoneme, Type:Mapping, 2:5) %>% 
      knitr::kable(caption = "Westbury - Shape ANOVA Table", ) %>%
        kable_styling(full_width= F)

Westbury - Shape ANOVA Table
Phoneme	Type	Mapping	Estimate	SE	t	p
(Intercept)	(Intercept)	(Intercept)	0.293	0.099	2.964	0.003
ɛ	Vowel	Conventional	-0.811	0.113	-7.176	<0.001
ɪ	Vowel	Conventional	-0.954	0.105	-9.051	<0.001
i	Vowel	Iconic	-0.192	0.108	-1.778	0.075
m	Consonant	Iconic	0.130	0.108	1.205	0.228
n	Consonant	Iconic	-0.200	0.104	-1.927	0.054
p	Consonant	Iconic	-0.044	0.113	-0.387	0.699
ʃ	Consonant	Conventional	0.049	0.132	0.370	0.711
t	Consonant	Iconic	-1.556	0.105	-14.797	<0.001
θ	Consonant	Conventional	-1.291	0.151	-8.566	<0.001
ʌ	Vowel	Conventional	-0.257	0.107	-2.396	0.017

#
#Size Model
WestModel.Size <- glm(zLARGE.zSMALL ~ Phoneme, data=subset(WestTrimLong, Contains == 1))

TableReplace6<- 
  summary(WestModel.Size)$coefficients %>%
    data.frame() %>%
      tibble::rownames_to_column(var= "FixedEffect") %>%
      `colnames<-`(c("Phoneme", "Estimate", "SE", "t", "p")) %>%
        mutate_if(is.numeric, round, digits = 3) %>% 
          mutate_at("p", funs(ifelse(.<0.001, "<0.001", .)))

#reformat table, restoring parts fucked up by passing through glm
TableReplace6$Phoneme <- str_remove(TableReplace6$Phoneme, "Phoneme")

TableReplace6$Type <- mapvalues(TableReplace6$Phoneme, from = fromtype, to=totype2)
TableReplace6$Mapping <- mapvalues(TableReplace6$Phoneme, from = fromtype, to=totype1)
TableReplace6$Phoneme <- mapvalues(TableReplace6$Phoneme, from = fromlist, to = tolist)

TableReplace6 %>% 
    dplyr::select(Phoneme, Type:Mapping, 2:5) %>% 
      knitr::kable(caption = "Westbury - Shape ANOVA Table", ) %>%
        kable_styling(full_width= F)

Westbury - Shape ANOVA Table
Phoneme	Type	Mapping	Estimate	SE	t	p
(Intercept)	(Intercept)	(Intercept)	0.370	0.096	3.845	<0.001
ɛ	Vowel	Conventional	-0.972	0.110	-8.834	<0.001
ɪ	Vowel	Conventional	-0.648	0.103	-6.315	<0.001
i	Vowel	Iconic	-0.554	0.105	-5.283	<0.001
m	Consonant	Iconic	-0.547	0.105	-5.217	<0.001
n	Consonant	Iconic	-0.330	0.101	-3.268	0.001
p	Consonant	Iconic	-0.935	0.110	-8.535	<0.001
ʃ	Consonant	Conventional	0.377	0.128	2.943	0.003
t	Consonant	Iconic	-0.661	0.102	-6.464	<0.001
θ	Consonant	Conventional	0.359	0.147	2.446	0.014
ʌ	Vowel	Conventional	-0.674	0.104	-6.458	<0.001

##Phonemes vs. Chance for Shape
WestModel.Shape.vChance <- test(emmeans(WestModel.Shape, 
                            ~Phoneme,
                             adjust = "tukey"))

WestModel.Shape.vChance$Type <- mapvalues(WestModel.Shape.vChance$Phoneme, from = fromtype, to=totype2)
WestModel.Shape.vChance$Mapping <- mapvalues(WestModel.Shape.vChance$Phoneme, from = fromtype, to=totype1)
WestModel.Shape.vChance$Phoneme <- mapvalues(WestModel.Shape.vChance$Phoneme, from = fromlist, to = tolist)

WestModel.Shape.vChance %<>%  
  dplyr::select(1, 7:8, 2:6) %>%
    `colnames<-`(c("Phoneme", "Type", "Mapping", "em mean", "SE", "df", "z ratio", "p")) %>%
        mutate_if(is.numeric, round, 2) %>%
          mutate_at(.vars = vars("p"), funs(ifelse(.<0.001, "<0.001", .)))


merge(Westbury.Summary,WestModel.Shape.vChance, by= "Phoneme") %>%
  dplyr::select(1, 6:7, 4:5, 13:14) %>%
  arrange(`Round-Mean`) %>%
    knitr::kable(caption = 'Westbury- Phoneme-Shape Mappings vs. Chance') %>%
    kable_styling(full_width = F)

Westbury- Phoneme-Shape Mappings vs. Chance
Phoneme	Type.x	Mapping.x	Round-Mean	Round-SE	z ratio	p
t	Consonant	Iconic	-1.2635224	0.0304130	-34.96	<0.001
θ	Consonant	Conventional	-0.9978556	0.0774796	-8.77	<0.001
ɪ	Vowel	Conventional	-0.6611889	0.0395652	-17.98	<0.001
ɛ	Vowel	Conventional	-0.5182485	0.0624254	-9.43	<0.001
ʌ	Vowel	Conventional	0.0358692	0.0430849	0.86	0.39
n	Consonant	Iconic	0.0930900	0.0313776	2.97	<0.001
i	Vowel	Iconic	0.1012768	0.0451630	2.36	0.02
p	Consonant	Iconic	0.2491937	0.0547132	4.60	<0.001
ɒ	Vowel	Iconic	0.2927984	0.0955280	2.96	<0.001
ʃ	Consonant	Conventional	0.3414687	0.0845604	3.93	<0.001
m	Consonant	Iconic	0.4227083	0.0417356	9.79	<0.001

WestModel.Size.vChance <- test(emmeans(WestModel.Size, 
                            ~Phoneme,
                             adjust = "tukey"))

WestModel.Size.vChance$Type <- mapvalues(WestModel.Size.vChance$Phoneme, from = fromtype, to=totype2)
WestModel.Size.vChance$Mapping <- mapvalues(WestModel.Size.vChance$Phoneme, from = fromtype, to=totype1)
WestModel.Size.vChance$Phoneme <- mapvalues(WestModel.Size.vChance$Phoneme, from = fromlist, to = tolist)

WestModel.Size.vChance %<>%  
  dplyr::select(1, 7:8, 2:6) %>%
    `colnames<-`(c("Phoneme", "Type", "Mapping", "em mean", "SE", "df", "z ratio", "p")) %>%
        mutate_if(is.numeric, round, 2) %>%
          mutate_at(.vars = vars("p"), funs(ifelse(.<0.001, "<0.001", .)))


merge(Westbury.Summary,WestModel.Size.vChance, by= "Phoneme") %>%
  dplyr::select(1, 6:7, 2:3, 13:14) %>%
  arrange(`Large-Mean`) %>%
    knitr::kable(caption = 'Westbury- Phoneme-Size Mappings vs. Chance') %>%
    kable_styling(full_width = F)

Westbury- Phoneme-Size Mappings vs. Chance
Phoneme	Type.x	Mapping.x	Large-Mean	Large-SE	z ratio	p
ɛ	Vowel	Conventional	-0.6020379	0.0512162	-11.26	<0.001
p	Consonant	Iconic	-0.5658686	0.0490263	-10.74	<0.001
ʌ	Vowel	Conventional	-0.3043695	0.0403417	-7.49	<0.001
t	Consonant	Iconic	-0.2919475	0.0344894	-8.30	<0.001
ɪ	Vowel	Conventional	-0.2781386	0.0375739	-7.77	<0.001
i	Vowel	Iconic	-0.1841797	0.0417106	-4.41	<0.001
m	Consonant	Iconic	-0.1777205	0.0424066	-4.23	<0.001
n	Consonant	Iconic	0.0399707	0.0315321	1.31	0.19
ɒ	Vowel	Iconic	0.3695294	0.0963275	3.84	<0.001
θ	Consonant	Conventional	0.7281002	0.1015028	6.58	<0.001
ʃ	Consonant	Conventional	0.7462178	0.0745745	8.83	<0.001