Iconicity and Learning Aanlysis

This is an R Markdown document looking at publications and citations for articles about phonetic symbolism, sound symbolism, iconicity, and learning

Reading in the data

First we need to Read In and Sanitize the Data for Analysis

mainPath <- 'D:/Google Drive/Publications/Iconicity and Learning Review/data'   ## Sets the path of the local directory

Citations <- read.csv("D:/Google Drive/Publications/Iconicity and Learning Review/data/citations.csv")       ## Reads in Experiment 1 Data
#Citations <- read.csv("F:/Publications/Iconicity and Learning Review/data/citations.csv")  

head(Citations)

##   Phonetic.Symbolism Sound.Symbolism Iconicity Learning RamaCite Bouba
## 1                  1              NA        NA       NA       NA    NA
## 2                  1              NA        NA       NA       NA    NA
## 3                  1              NA        NA       NA       NA    NA
## 4                  1              NA        NA       NA       NA    NA
## 5                  1              NA        NA       NA       NA    NA
## 6                 NA              NA         1       NA       NA    NA
##   Takete                                                       Title
## 1     NA                               A study in phonetic symbolism
## 2     NA                   Further experiments in phonetic symbolism
## 3     NA                  An accessory study of phonetic symbolism""
## 4     NA                         Phonetic Symbolism of Deaf Children
## 5     NA                         Phonetic Symbolism of Deaf Children
## 6     NA Sex, Age, and Iconicity as Factors in Projective Film Tests
##                              Authors Publication.Year Volume Issue
## 1                           Sapir, E             1929     12      
## 2                         Newman, SS             1933     45      
## 3              Bentley, M; Varon, EJ             1933     45      
## 4 Heider, Fritz; Heider, Grace Moore             1941     43     4
## 5 Heider, Fritz; Heider, Grace Moore             1941     43     3
## 6               McIntyre, Charles J.             1954     18     5
##   Beginning.Page Ending.Page              DOI Total.Citations
## 1            225         239 10.1037/h0070931             256
## 2             53          75  10.2307/1414186             114
## 3             76          86  10.2307/1414187              40
## 4            233         236                                0
## 5            165         168                                0
## 6            337         343                                0
##   Average.per.Year X1900 X1901 X1902 X1903 X1904 X1905 X1906 X1907 X1908
## 1             2.88     0     0     0     0     0     0     0     0     0
## 2             1.34     0     0     0     0     0     0     0     0     0
## 3             0.47     0     0     0     0     0     0     0     0     0
## 4             0.00     0     0     0     0     0     0     0     0     0
## 5             0.00     0     0     0     0     0     0     0     0     0
## 6             0.00     0     0     0     0     0     0     0     0     0
##   X1909 X1910 X1911 X1912 X1913 X1914 X1915 X1916 X1917 X1918 X1919 X1920
## 1     0     0     0     0     0     0     0     0     0     0     0     0
## 2     0     0     0     0     0     0     0     0     0     0     0     0
## 3     0     0     0     0     0     0     0     0     0     0     0     0
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X1921 X1922 X1923 X1924 X1925 X1926 X1927 X1928 X1929 X1930 X1931 X1932
## 1     0     0     0     0     0     0     0     0     0     1     0     0
## 2     0     0     0     0     0     0     0     0     0     0     0     0
## 3     0     0     0     0     0     0     0     0     0     0     0     0
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X1933 X1934 X1935 X1936 X1937 X1938 X1939 X1940 X1941 X1942 X1943 X1944
## 1     2     1     1     1     0     0     1     0     1     0     0     0
## 2     1     0     1     1     0     0     0     0     1     0     0     0
## 3     0     0     1     1     0     1     0     0     0     0     1     0
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X1945 X1946 X1947 X1948 X1949 X1950 X1951 X1952 X1953 X1954 X1955 X1956
## 1     0     1     0     0     0     0     1     1     0     0     1     0
## 2     0     1     1     0     0     0     1     1     0     0     1     0
## 3     0     0     0     0     0     0     0     0     0     0     1     0
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X1957 X1958 X1959 X1960 X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968
## 1     2     1     2     1     0     0     0     3     1     5     2     1
## 2     0     1     1     1     0     1     1     4     0     3     2     2
## 3     0     1     0     1     0     0     2     2     1     3     2     1
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X1969 X1970 X1971 X1972 X1973 X1974 X1975 X1976 X1977 X1978 X1979 X1980
## 1     1     0     7     2     1     2     1     6     4     2     0     2
## 2     1     0     3     2     0     1     1     4     2     0     0     1
## 3     0     0     0     0     0     0     1     1     1     0     0     0
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X1981 X1982 X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992
## 1     3     2     2     4     0     1     3     2     1     1     2     2
## 2     1     3     1     2     0     1     2     1     0     1     1     1
## 3     0     0     0     0     0     0     0     0     0     0     1     0
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X1993 X1994 X1995 X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004
## 1     1     2     0     1     1     2     1     0     3     1     4     2
## 2     0     0     0     0     0     2     1     0     1     1     3     0
## 3     0     0     0     0     0     1     1     0     1     0     2     0
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X2005 X2006 X2007 X2008 X2009 X2010 X2011 X2012 X2013 X2014 X2015 X2016
## 1     2     3     2     4     5    11    19    16    13    19    19    25
## 2     1     0     1     1     2     3     7     7     6     5     4    10
## 3     1     0     1     0     0     1     1     1     1     3     1     2
## 4     0     0     0     0     0     0     0     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     0     0     0     0     0     0     0     0     0     0
##   X2017 X2018  X
## 1    19     0 NA
## 2     5     0 NA
## 3     1     0 NA
## 4     0     0 NA
## 5     0     0 NA
## 6     0     0 NA

This data is coded with a few columns that show whether articles fall under various “topics” in the Web of Science database- the topics are “Phonetic_Symbolism”, “Sound Symbolism”, “Iconicity”, and “Learning”. Finally, there is an additional column “RamaCite” which is coded for whether an article cites Ramachandran and Hubbard (2001)

The data brought in raw is very messy- Web of Science has many duplicated/partially overlapping entries. Also each “topic” query was done separately, so articles that are listed under “phonetic symbolism” and under “sound symbolism” are listed twice- we need to clean this up.

First, we will clean this up by splitting the big dataframe by each of the columns mentioned above

Citations$Title <- tolower(Citations$Title)   # put all the titles into lower case

CitationsPhon <- subset(Citations, Phonetic.Symbolism == 1)
CitationsSound <- subset(Citations, Sound.Symbolism == 1)
CitationsIcon <- subset(Citations, Iconicity == 1)
CitationsRama <- subset(Citations, RamaCite == 1)
CitationsLearn <- subset(Citations, Learning == 1)
CitationsBouba <- subset(Citations, Bouba == 1)
CitationsTakete <- subset(Citations, Takete == 1)

Then we will remove duplicate titles in each of those dataframes

CitationsPhonC<- CitationsPhon[!duplicated(CitationsPhon[, "Title"]), ]
CitationsSoundC<- CitationsSound[!duplicated(CitationsSound[, "Title"]), ]
CitationsIconC<- CitationsIcon[!duplicated(CitationsIcon[, "Title"]), ]
CitationsRamaC<- CitationsRama[!duplicated(CitationsRama[, "Title"]), ]
CitationsLearnC<- CitationsLearn[!duplicated(CitationsLearn[, "Title"]), ]
CitationsBoubaC<- CitationsBouba[!duplicated(CitationsBouba[, "Title"]), ]
CitationsTaketeC<- CitationsTakete[!duplicated(CitationsTakete[, "Title"]), ]

This removed 24 Duplicate Titles from CitationsPhon, 45 from CitationsSound, 96 from CitationsIcon, 0 from CitationsRama and 30 from CitationsLearn

We can now take these separate lists and start merging them back together. But to make this slightly easier, we’re going to be a bit hacky

First we remove duplicates in the full citations list, and drop the selection columns

CitationsC<- Citations[!duplicated(Citations[, "Title"]), ]
CitationsCS <- subset(CitationsC, select = -c(Phonetic.Symbolism, Sound.Symbolism, Iconicity, Learning, RamaCite, Bouba, Takete))

This removed 394 Duplicate Titles and 7 columns

Now we can start merging back in the other filtered data

CitationsCS$PhonSym <- CitationsPhonC[match(CitationsCS$Title, CitationsPhonC$Title),]$Phonetic.Symbolism
CitationsCS$SoundSym <- CitationsSoundC[match(CitationsCS$Title, CitationsSoundC$Title),]$Sound.Symbolism
CitationsCS$Icon <- CitationsIconC[match(CitationsCS$Title, CitationsIconC$Title),]$Iconicity
CitationsCS$Learning <- CitationsLearnC[match(CitationsCS$Title, CitationsLearnC$Title),]$Learning
CitationsCS$Rama <- CitationsRamaC[match(CitationsCS$Title, CitationsRamaC$Title),]$RamaCite
CitationsCS$Bouba <- CitationsBoubaC[match(CitationsCS$Title, CitationsBoubaC$Title),]$Bouba
CitationsCS$Takete <- CitationsTaketeC[match(CitationsCS$Title, CitationsTaketeC$Title),]$Takete

Now lets quickly write the output to a csv “CitationsCorrected.csv”

write.csv(CitationsCS, "D:/Google Drive/Publications/Iconicity and Learning Review/data/CitationsCorrected.csv")

#write.csv(CitationsCS, "F:/Publications/Iconicity and Learning Review/data/CitationsCorrected.csv")

So, now what can we do with this data?

The first thing we can do is output some basic graphs of Publications by Year:

hist(CitationsCS$Publication.Year)

Publications <- density(CitationsCS$Publication.Year)
plot(Publications, main="Kernel Density of Publications by Year")
polygon(Publications, col= 'red', border= 'black')

These graphs give you a pretty clear view of the upward trend in publications about Sound Symbolism and related topics, but now true are they to real trends? Obviously, the total number of publications in related fields has also gone up every year, so is there really an iconicity boom?

Let’s load in some other data and take a look:

PsychArticles <- read.csv("D:/Google Drive/Publications/Iconicity and Learning Review/data/PsyArticles.csv")

#PsychArticles <- read.csv("F:/Publications/Iconicity and Learning Review/data/PsyArticles.csv")

PsychArticles$Articles <- as.numeric(as.character(PsychArticles$Articles))
PsychArticles$Year <- as.factor(PsychArticles$Year)

library(ggplot2)

ggplot(data=PsychArticles, aes(x=Year, y=Articles, group=1)) +
  geom_line(color="#aa0022", size=1.75) +
  geom_point(color="#aa0022", size=3.5) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Psychology Articles per Year (1900-2017)") +
  labs(x="", y="Articles Published") +
  theme(axis.title.y = element_text(size=14,  color="#666666")) +
  theme(axis.text = element_text(size=16)) +
  theme(plot.title = element_text(size=26, face="bold", hjust=0, color="#666666"))

So psychology papers in general have seen a pretty dramatic upsweep as well, to make that clear, here’s the same omnibus graph but for the Sound-symbolism-like topics:

library(plyr)
SSArticles <- count(CitationsCS$Publication.Year)
SSArticles$Articles <- as.numeric(as.character(SSArticles$freq))
SSArticles$Year <- as.factor(SSArticles$x)

library(ggplot2)

ggplot(data=SSArticles, aes(x=Year, y=Articles, group=1)) +
  geom_line(color="#aa0022", size=1.75) +
  geom_point(color="#aa0022", size=3.5) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017)") +
  labs(x="", y="Articles Published") +
  theme(axis.title.y = element_text(size=14,  color="#666666")) +
  theme(axis.text = element_text(size=16)) +
  theme(plot.title = element_text(size=26, face="bold", hjust=0, color="#666666"))

The upticks look pretty similar - other than obviously there are far fewer Sound Symbolism related articles than there are Psychology ones in general- so to get an idea of how big the uptick actually is, we need to take our Sound Symbolism articles and “correct” them for the total number of Psychology articles each year (of note, we can do this for any “control” term- but ~80% of articles under the topics of interest fall under Psychology, Linguistics, or Neuroscience)

Articles <- subset(PsychArticles, select = c(Year, Articles))
Articles$Psych <- Articles$Articles

Articles$SS <- SSArticles[match(Articles$Year, SSArticles$Year),]$Articles

Articles$PropSS <- Articles$SS / Articles$Psych
Articles$PropSS[is.na(Articles$PropSS)] <- 0

ggplot(data=Articles, aes(x=Year, y=PropSS, group=1)) +
  geom_line(color="#aa0022", size=1.75) +
  geom_point(color="#aa0022", size=3.5) +
  geom_smooth(method='loess', formula= y ~ x) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017)") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=14,  color="#666666")) +
  theme(axis.text = element_text(size=10)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

So, it does indeed look like a veridical uptick, pretty clearly, even when we control for the fact that overall, the number of publications increases every year

But there are lots of other things we can still look at- First, lets look at the trajectories of the uses of the various terms: Phonetic Symbolism vs. Sound Symbolism vs. Iconicity (note- from now on, because we know the uptick in publications is veridical, we won’t look at proportion-corrected data)

To make this simple, we’ll exclude articles that list more than one of the terms of interest

CitationsCS$PhonSym[is.na(CitationsCS$PhonSym)] <- 0
CitationsCS$SoundSym[is.na(CitationsCS$SoundSym)] <- 0
CitationsCS$Icon[is.na(CitationsCS$Icon)] <- 0



CitationsCS$Excluder <- CitationsCS$PhonSym + CitationsCS$SoundSym + CitationsCS$Icon

TerminologyChange <- subset(CitationsCS, Excluder == 1)

This takes us from 1540 articles down to 1047 articles to be considered.

So lets now take a look at the use of the various terms over time:

TerminologyChange <-subset(TerminologyChange, select = c(Title, Publication.Year, PhonSym, SoundSym, Icon))
library(tidyr)
TerminologyChange <- unite(TerminologyChange, PS, PhonSym, SoundSym, sep = "-", remove = FALSE)
TerminologyChange <- unite(TerminologyChange, PSI, PS, Icon, sep = "-", remove = FALSE)

TerminologyChange$Term <- mapvalues(TerminologyChange$PSI, 
                        from= c("1-0-0", "0-1-0", "0-0-1"),
                        to= c("Phonetic Symbolism", "Sound Symbolism", "Iconicity"))

TerminologyChange$Term<-as.factor(TerminologyChange$Term)

library(sm)

## Package 'sm', version 2.2-5.4: type help(sm) for summary information

attach(TerminologyChange)

sm.density.compare(Publication.Year, Term, xlim=c(1900, 2010))
legend("topleft", levels(TerminologyChange$Term), fill = 2+(0:nlevels(TerminologyChange$Term)))

So you can see “Sound Symbolism” starts as a term in the 1960s, then overtakes “Phonetic Symbolism” in the 1990s, where now it is the much more common of the two. “Iconicity” as a topic picks up right around the same time as Sound Symbolism does

What other things might we want to take a look at? How about the growth in mentions of Sound Symbolism for Learning

CitationsCS$Learning[is.na(CitationsCS$Learning)] <- 0
CitationsCS$Learning <- as.factor(CitationsCS$Learning)


library(sm)
attach(CitationsCS)

## The following objects are masked from TerminologyChange:
## 
##     Icon, PhonSym, Publication.Year, SoundSym, Title

sm.density.compare(Publication.Year, Learning, xlim=c(1900, 2010))
legend("topleft", levels(CitationsCS$Learning), fill = 2+(0:nlevels(CitationsCS$Learning)))

This doesn’t seem to show a whole lot- learning doesn’t seem to have special privelege, at least here- is this because density plots are proportions? Lets take a look at some raw figures

LearningPlus <- subset(CitationsCS, Learning == 1)
LearningMinus <- subset(CitationsCS, Learning == 0)

LearningPlusCount <- count(LearningPlus$Publication.Year)
LearningMinusCount <- count(LearningMinus$Publication.Year)

Articles$Learning <- LearningPlusCount[match(Articles$Year, LearningPlusCount$x),]$freq
Articles$NoLearning <- LearningMinusCount[match(Articles$Year, LearningMinusCount$x),]$freq

Articles$Learning[is.na(Articles$Learning)] <- 0
Articles$NoLearning[is.na(Articles$NoLearning)] <- 0

Articles$Learning <- Articles$Learning / Articles$Psych
Articles$NoLearning <- Articles$NoLearning / Articles$Psych

Learning <- subset(Articles, select = c(Year, Learning, NoLearning))

library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

dfm <- melt(Learning, id.vars = "Year")
dfm$Learning <- as.factor(dfm$variable)

library(colorspace)
rainbowcols <- rainbow(7, s = 0.8)


ggplot(dfm, aes(x = Year, y = value, colour = Learning, group= Learning)) + 
  #scale_color_manual(values=rainbowcols)+
  geom_smooth(aes(colour = Learning),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Learning)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

So that actually looks pretty great- let’s give that technique a try for some of our other graphs

First let’s reproduce our changing terminology graphs, but this time using actual values, instead of Density Plots

First we’ll produce one that counts overlaps- i.e. if an article has both “sound sybolism” and “phonetic symbolism” as topics, it counts for both

CitationsCS2 <- subset(CitationsCS, select = c(Title, Authors, Publication.Year, PhonSym, SoundSym, Icon, Learning, Rama, Bouba, Takete))
CitationsCS2$Rama[is.na(CitationsCS2$Rama)] <- 0

PubsP <- count(subset(CitationsCS2, select=c(PhonSym, Publication.Year), PhonSym == 1)$Publication.Year)                  
PubsS <- count(subset(CitationsCS2, select=c(SoundSym, Publication.Year), SoundSym == 1)$Publication.Year) 
PubsI <- count(subset(CitationsCS2, select=c(Icon, Publication.Year), Icon == 1)$Publication.Year)

Articles$Phon <- PubsP[match(Articles$Year, PubsP$x),]$freq
Articles$Phon[is.na(Articles$Phon)] <- 0
Articles$Phon <- Articles$Phon / Articles$Psych

Articles$Sound <- PubsS[match(Articles$Year, PubsS$x),]$freq
Articles$Sound[is.na(Articles$Sound)] <- 0
Articles$Sound <- Articles$Sound / Articles$Psych

Articles$Icon <- PubsI[match(Articles$Year, PubsI$x),]$freq
Articles$Icon[is.na(Articles$Icon)] <- 0
Articles$Icon <- Articles$Icon / Articles$Psych

TermOverlap <- subset(Articles, select = c(Year, Phon, Sound, Icon))
TOMolten <- melt(TermOverlap, id.vars = "Year")
TOMolten$Terminology <- as.factor(TOMolten$variable)

TOMolten$Terminology <- mapvalues(TOMolten$Terminology, 
                        from= c("Phon", "Sound", "Icon"),
                        to= c("Phonetic Symbolism", "Sound Symbolism", "Iconicity"))

ggplot(TOMolten, aes(x = Year, y = value, colour = Terminology, group= Terminology)) + 
  geom_smooth(aes(colour = Terminology),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Terminology)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

Still looks really nice- the ‘loess’ function isn’t super kind to phonetic symbolism- i.e. the line that it draws really ignores that at one point there was relative to the field a much larger number of ‘phonetic symbolism’ articles

Now , lets take a look at the terminology stuff again, but this time taking out the overlapping bits

CitationsCS2 <- unite(CitationsCS2, PS, PhonSym, SoundSym, sep = "-", remove = FALSE)
CitationsCS2 <- unite(CitationsCS2, PSI, PS, Icon, sep = "-", remove = FALSE)

CitationsCS2$Terminology <- mapvalues(CitationsCS2$PSI, 
                        from= c("1-1-1", "1-1-0", "1-0-0", "1-0-1", "0-0-0", "0-0-1", "0-1-0", "0-1-1"),
                        to= c("PS + SS + Icon", "PS + SS", "PS", "PS + Icon", "None", "Icon", "SS", "SS + Icon"))

PubsP2 <- count(subset(CitationsCS2, select=c(Terminology, Publication.Year), Terminology == "PS")$Publication.Year)                  
PubsS2 <- count(subset(CitationsCS2, select=c(Terminology, Publication.Year), Terminology == "SS")$Publication.Year) 
PubsI2 <- count(subset(CitationsCS2, select=c(Terminology, Publication.Year), Terminology == "Icon")$Publication.Year)
PubsPS <- count(subset(CitationsCS2, select=c(Terminology, Publication.Year), Terminology == "PS + SS")$Publication.Year)                  
PubsPI <- count(subset(CitationsCS2, select=c(Terminology, Publication.Year), Terminology == "PS + Icon")$Publication.Year) 
PubsSI <- count(subset(CitationsCS2, select=c(Terminology, Publication.Year), Terminology == "SS + Icon")$Publication.Year)
PubsPSI <- count(subset(CitationsCS2, select=c(Terminology, Publication.Year), Terminology == "PS + SS + Icon")$Publication.Year)

Articles$PS <- PubsP2[match(Articles$Year, PubsP2$x),]$freq
Articles$PS[is.na(Articles$PS)] <- 0
Articles$PS <- Articles$PS / Articles$Psych

Articles$SS <- PubsS2[match(Articles$Year, PubsS2$x),]$freq
Articles$SS[is.na(Articles$SS)] <- 0
Articles$SS <- Articles$SS / Articles$Psych

Articles$I <- PubsI2[match(Articles$Year, PubsI2$x),]$freq
Articles$I[is.na(Articles$I)] <- 0
Articles$I <- Articles$I / Articles$Psych

Articles$PSSS <- PubsPS[match(Articles$Year, PubsPS$x),]$freq
Articles$PSSS[is.na(Articles$PSSS)] <- 0
Articles$PSSS <- Articles$PSSS / Articles$Psych

Articles$PSI <- PubsPI[match(Articles$Year, PubsPI$x),]$freq
Articles$PSI[is.na(Articles$PSI)] <- 0
Articles$PSI <- Articles$PSI / Articles$Psych

Articles$SSI <- PubsSI[match(Articles$Year, PubsSI$x),]$freq
Articles$SSI[is.na(Articles$SSI)] <- 0
Articles$SSI <- Articles$SSI / Articles$Psych

Articles$PSSSI <- PubsPSI[match(Articles$Year, PubsPSI$x),]$freq
Articles$PSSSI[is.na(Articles$PSSSI)] <- 0
Articles$PSSSI <- Articles$PSSSI / Articles$Psych

TermNoOverlap <- subset(Articles, select = c(Year, PS, SS, I, PSSS, PSI, SSI, PSSSI))
TNMolten <- melt(TermNoOverlap, id.vars = "Year")
TNMolten$Terminology <- as.factor(TNMolten$variable)

TNMolten$Terminology <- mapvalues(TNMolten$Terminology, 
                        from= c("PS", "SS", "I", "PSSS", "PSI", "SSI", "PSSSI"),
                        to= c("Phonetic Symbolism", "Sound Symbolism", "Iconicity", "Phonetic Symbolism + Sound Symbolism", "Phonetic Symbolism + Iconicity",  "Sound Symbolism + Iconicity", "All"))

ggplot(TNMolten, aes(x = Year, y = value, colour = Terminology, group= Terminology)) + 
  geom_smooth(aes(colour = Terminology),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Terminology)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

So that produces us a graph, but it’s pretty ugly - mostly because the combinations of various terms into the same papers are relatively late to arrive to the party - so lets look at the basic terms and their combinations on Separate graphs

First for only the Basic Topics

TNMoltenBasic <- subset(TNMolten, Terminology == "Phonetic Symbolism"| Terminology == "Sound Symbolism" | Terminology == "Iconicity")

ggplot(TNMoltenBasic, aes(x = Year, y = value, colour = Terminology, group= Terminology)) + 
  geom_smooth(aes(colour = Terminology),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Terminology)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

TNMoltenCombo <- subset(TNMolten, Terminology == "Phonetic Symbolism + Sound Symbolism"| 
                          Terminology == "Sound Symbolism + Iconicity" |
                          Terminology == "Phonetic Symbolism + Iconicity" |
                          Terminology == "All")

ggplot(TNMoltenCombo, aes(x = Year, y = value, colour = Terminology, group= Terminology)) + 
  geom_smooth(aes(colour = Terminology),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Terminology)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

Awesome- we can see that papers using a combination of terms is a relatively new development.

Now let’s quickly output a slightly prettier version of the omnibus Corrected Publication Rate Note- this is also a more accurate graph- the first one we output contained articles that Cited Ramachandran and Hubbard but did not fall under any of topics we were interested in

CitationsCS3 <- subset(CitationsCS2, PSI != "0-0-0")

SSArticles2 <- count(CitationsCS3$Publication.Year)
SSArticles2$Articles <- as.numeric(as.character(SSArticles2$freq))
SSArticles2$Year <- as.factor(SSArticles2$x)

Articles$SS2 <- SSArticles2[match(Articles$Year, SSArticles2$Year),]$Articles

Articles$PropSS2 <- Articles$SS2 / Articles$Psych
Articles$PropSS2[is.na(Articles$PropSS2)] <- 0

ggplot(Articles, aes(x = Year, y = PropSS2, group = 1)) + 
  geom_smooth(aes(col='#f8766d'),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col='#f8766d')) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666")) +
  theme(legend.position="none")

We have that prettied up- what else can we take a look at?

Finally, we can take a look at the influence of Ramachandran and Hubbard (2001)

PubsRama <- count(subset(CitationsCS3, select=c(Rama, Publication.Year), Rama == 1)$Publication.Year) 
PubsNoRama <- count(subset(CitationsCS3, select=c(Rama, Publication.Year), Rama == 0)$Publication.Year)

Articles$SSRama <- PubsRama[match(Articles$Year, PubsRama$x),]$freq
Articles$SSRama[is.na(Articles$SSRama)] <- 0
Articles$SSRama <- Articles$SSRama / Articles$Psych

Articles$SSNoRama <- PubsNoRama[match(Articles$Year, PubsNoRama$x),]$freq
Articles$SSNoRama[is.na(Articles$SSNoRama)] <- 0
Articles$SSNoRama <- Articles$SSNoRama / Articles$Psych

Rama <- subset(Articles, select = c(Year, SSRama, SSNoRama))
RamaMolten <- melt(Rama, id.vars = "Year")
RamaMolten$Citation <- as.factor(RamaMolten$variable)

RamaMolten$Citation <- mapvalues(RamaMolten$Citation, 
                        from= c("SSRama", "SSNoRama"),
                        to= c("Cites R+H (2001)", "Does Not Cite R+H (2001)"))

ggplot(RamaMolten, aes(x = Year, y = value, colour = Citation, group= Citation)) + 
  geom_smooth(aes(colour = Citation),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Citation)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

So that doesn’t quite seem to tell the same story as previous colorings of my graphs- why is that the case?

Is it just that papers don’t all cite Ramachandran and Hubbard (2001) despite being influence by it (through other papers), or is it really not as formative of a paper as I had assumed?

Maybe it’s just that the “Iconicity” literature isn’t citing R+H, but the Sound/Phonetic Symbolism literature is- let’s check that out

CitationsCS4 <- subset(CitationsCS3, PSI == "1-1-0" | PSI == "1-0-0" | PSI == "0-1-0")

PubsRama2 <- count(subset(CitationsCS4, select=c(Rama, Publication.Year), Rama == 1)$Publication.Year) 
PubsNoRama2 <- count(subset(CitationsCS4, select=c(Rama, Publication.Year), Rama == 0)$Publication.Year)

Articles$SSRama2 <- PubsRama2[match(Articles$Year, PubsRama2$x),]$freq
Articles$SSRama2[is.na(Articles$SSRama2)] <- 0
Articles$SSRama2 <- Articles$SSRama2 / Articles$Psych

Articles$SSNoRama2 <- PubsNoRama2[match(Articles$Year, PubsNoRama2$x),]$freq
Articles$SSNoRama2[is.na(Articles$SSNoRama2)] <- 0
Articles$SSNoRama2 <- Articles$SSNoRama2 / Articles$Psych

Rama2 <- subset(Articles, select = c(Year, SSRama2, SSNoRama2))
Rama2Molten <- melt(Rama2, id.vars = "Year")
Rama2Molten$Citation <- as.factor(Rama2Molten$variable)

Rama2Molten$Citation <- mapvalues(Rama2Molten$Citation, 
                        from= c("SSRama2", "SSNoRama2"),
                        to= c("Cites R+H (2001)", "Does Not Cite R+H (2001)"))

ggplot(Rama2Molten, aes(x = Year, y = value, colour = Citation, group= Citation)) + 
  geom_smooth(aes(colour = Citation),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Citation)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

So that looks much more eminently sensible- lets just replot it with less years– R+H came out in 2001, so years before that aren’t really relevant

Rama3Molten <- Rama2Molten

Rama3Molten$Year <- as.numeric(as.character(Rama3Molten$Year)) 
Rama3Molten <- subset(Rama3Molten,  Year > 2000)
Rama3Molten$Year <- as.factor(Rama3Molten$Year)


ggplot(Rama3Molten, aes(x = Year, y = value, colour = Citation, group= Citation)) + 
  geom_smooth(aes(colour = Citation),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Citation)) +
  scale_x_discrete(breaks=c("2000", "2005", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (2000-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

So maybe there is something to the influence of R+H- Maybe this is only the case for Bouba-kiki/Takete-Maluma papers- and not sound-symbolism work more broadly (although you could argue that Bouba-kiki papers have led to renewed interest in Sound-symbolism)

CitationsCS2$Bouba[is.na(CitationsCS2$Bouba)] <- 0
CitationsCS2$Takete[is.na(CitationsCS2$Takete)] <- 0

CitationsCS2 <- unite(CitationsCS2, BT, Bouba, Takete, sep = "-", remove = FALSE)

CitationsCS2$BKPapers <- mapvalues(CitationsCS2$BT, 
                                   from = c("0-0", "0-1", "1-0", "1-1"),
                                   to = c(0, 1, 1, 1))

CitationsCS2BKTerm <- mapvalues(CitationsCS2$BT, 
                                   from = c("0-0", "0-1", "1-0", "1-1"),
                                   to = c("None", "Takete-Maluma",  "Bouba-Kiki", "Both"))


BKPapers <- subset(CitationsCS2, BKPapers != 0)


PubsBKRama <- count(subset(BKPapers, select=c(BKPapers, Publication.Year), Rama == 1)$Publication.Year) 
PubsBKNoRama <- count(subset(BKPapers, select=c(BKPapers, Publication.Year), Rama == 0)$Publication.Year)

Articles$BKRama <- PubsBKRama[match(Articles$Year, PubsBKRama$x),]$freq
Articles$BKRama[is.na(Articles$BKRama)] <- 0
Articles$BKRama <- Articles$BKRama / Articles$Psych

Articles$BKNoRama <- PubsBKNoRama[match(Articles$Year, PubsBKNoRama$x),]$freq
Articles$BKNoRama[is.na(Articles$BKNoRama)] <- 0
Articles$BKNoRama <- Articles$BKNoRama / Articles$Psych

Rama3 <- subset(Articles, select = c(Year, BKRama, BKNoRama))
Rama3Molten <- melt(Rama3, id.vars = "Year")
Rama3Molten$Citation <- as.factor(Rama3Molten$variable)

Rama3Molten$Citation <- mapvalues(Rama3Molten$Citation, 
                        from= c("BKRama", "BKNoRama"),
                        to= c("Cites R+H (2001)", "Does Not Cite R+H (2001)"))

ggplot(Rama3Molten, aes(x = Year, y = value, colour = Citation, group= Citation)) + 
  geom_smooth(aes(colour = Citation),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Citation)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

What about Bouba-Kiki vs. Takete-Maluma?

PubsBK <- count(subset(BKPapers, select=c(BKPapers, Publication.Year), BT == "1-0")$Publication.Year) 
PubsTM <- count(subset(BKPapers, select=c(BKPapers, Publication.Year), BT == "0-1")$Publication.Year)
PubsBKTM <- count(subset(BKPapers, select=c(BKPapers, Publication.Year), BT == "1-1")$Publication.Year)

Articles$BK <- PubsBK[match(Articles$Year, PubsBK$x),]$freq
Articles$BK[is.na(Articles$BK)] <- 0
Articles$BK <- Articles$BK / Articles$Psych

Articles$TM <- PubsTM[match(Articles$Year, PubsTM$x),]$freq
Articles$TM[is.na(Articles$TM)] <- 0
Articles$TM <- Articles$TM / Articles$Psych

Articles$BKTM <- PubsBKTM[match(Articles$Year, PubsBKTM$x),]$freq
Articles$BKTM[is.na(Articles$BKTM)] <- 0
Articles$BKTM <- Articles$BKTM / Articles$Psych

BKTerm <- subset(Articles, select = c(Year, BK, TM, BKTM))
BKTermMolten <- melt(BKTerm, id.vars = "Year")
BKTermMolten$Terminology <- as.factor(BKTermMolten$variable)

BKTermMolten$Terminology <- mapvalues(BKTermMolten$Terminology, 
                        from= c("BK", "TM", "BKTM"),
                        to= c("Bouba-Kiki", "Takete-Maluma",  "Both"))

ggplot(BKTermMolten, aes(x = Year, y = value, colour = Terminology, group= Terminology)) + 
  geom_smooth(aes(colour = Terminology),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Terminology)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

Groovy, but lots of useless graph - let’s look only at modern papers

BKTermMolten2 <- BKTermMolten

BKTermMolten2$Year <- as.numeric(as.character(BKTermMolten2$Year)) 
BKTermMolten2 <- subset(BKTermMolten2,  Year > 2000)
BKTermMolten2$Year <- as.factor(BKTermMolten2$Year)


ggplot(BKTermMolten2, aes(x = Year, y = value, colour = Terminology, group= Terminology)) + 
  geom_smooth(aes(colour = Terminology),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Terminology)) +
  scale_x_discrete(breaks=c("2000", "2005", "2010", "2017")) +
  ggtitle("Sound Symbolism Articles per Year (2000-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

Now lets look at some of the other influences on the Iconicity Boom

EmbodiedArticles <- read.csv("D:/Google Drive/Publications/Iconicity and Learning Review/data/embodiedcog.csv")
SignArticles <- read.csv("D:/Google Drive/Publications/Iconicity and Learning Review/data/signlanguage.csv")
IdeophoneArticles <- read.csv("D:/Google Drive/Publications/Iconicity and Learning Review/data/ideophones.csv")
SynesthesiaArticles <- read.csv("D:/Google Drive/Publications/Iconicity and Learning Review/data/synesthesia.csv")
CrossmodalityArticles <- read.csv("D:/Google Drive/Publications/Iconicity and Learning Review/data/crossmodality.csv")

Articles$Emb <- EmbodiedArticles[match(Articles$Year, EmbodiedArticles$Year),]$Embodied
Articles$Sign <- SignArticles[match(Articles$Year, SignArticles$Year),]$SignLanguage
Articles$Ideo <- IdeophoneArticles[match(Articles$Year, IdeophoneArticles$Year),]$Ideophones
Articles$Syn <- SynesthesiaArticles[match(Articles$Year, SynesthesiaArticles$Year),]$Synesthesia
Articles$Xmod <- CrossmodalityArticles[match(Articles$Year, CrossmodalityArticles$Year),]$Crossmodality

Articles$Emb <- Articles$Emb / Articles$Psych
Articles$Emb[is.na(Articles$Emb)] <- 0

Articles$Sign <- Articles$Sign / Articles$Psych
Articles$Sign[is.na(Articles$Sign)] <- 0

Articles$Ideo <- Articles$Ideo / Articles$Psych
Articles$Ideo[is.na(Articles$Ideo)] <- 0

Articles$Syn <- Articles$Syn / Articles$Psych
Articles$Syn[is.na(Articles$Syn)] <- 0

Articles$Xmod <- Articles$Xmod / Articles$Psych
Articles$Xmod[is.na(Articles$Xmod)] <- 0

Booms <- subset(Articles, select = c(Year, Icon, Emb, Sign, Ideo, Syn, Xmod))
BoomsMolten <- melt(Booms, id.vars = "Year")

BoomsMolten$Boom <- as.factor(BoomsMolten$variable)

BoomsMolten$Boom <- mapvalues(BoomsMolten$Boom, 
                        from= c("Icon", "Emb", "Sign", "Ideo", "Syn", "Xmod"),
                        to= c("Iconicity", "Embodied Cognition", "Sign Language", "Ideophones", "Synesthesia", "Crossmodality"))

ggplot(BoomsMolten, aes(x = Year, y = value, colour = Boom, group= Boom)) + 
  geom_smooth(aes(colour = Boom),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Boom)) +
  scale_x_discrete(breaks=c("1900", "1910", "1920", "1930", "1940", "1950", "1960", "1970", "1980", "1990", "2000", "2010", "2017")) +
  ggtitle("Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666"))

ggplot(BoomsMolten, aes(x = Year, y = value, colour = Boom, group= Boom)) + 
  geom_smooth(aes(colour = Boom),size = 1.5,se = F, method= 'loess', formula =  y~x)+
  geom_point(aes(col = Boom)) +
  scale_x_discrete(breaks=c("1900", "1925", "1950", "1975", "2000", "2017")) +
  ggtitle("Articles per Year (1900-2017) ") +
  labs(x="", y="Corrected Proportion of Articles Published") +
  theme(axis.title.y = element_text(size=12,  color="#666666")) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666")) +
  facet_wrap(~ Boom, ncol = 2, scales= "free_y")

Iconicity and Learning Aanlysis

Alan Nielsen

August 15, 2017

Reading in the data