Here’s an overview of all the packages we will need:
library(tidyverse)
library(readxl) #for reading in separate sheets from excel
library(plyr) #for summary tables throughout
library(psych) #for barletts test
library(FactoMineR) #for PCA and HCPC
library(factoextra) #for pretty graphs of HCPC
library(reshape2) #for other data wrangling
library(lme4) #for linear mixed regressions
library(lmerTest) #for pvalues from lmes
library(ggplot2) #for all plots
library(ggcorrplot)#for correlation plots
library(ggalt) #for dumbbell plots
library(cowplot) #for combining plots #https://cran.r-project.org/web/packages/cowplot/vignettes/plot_grid.html
Suggested to skip this section and head right to “General descriptive stats” using the navigation tab on the left
Lets load the data into R. Then we will combine them into a single data file. ** Load in the existing datasets (Note that column name standardization and ordering was performed in excel):
nts.import <- read_excel("CrossLgReferenceData.xlsx", sheet = "NSL",range = cell_cols("A:G"))
auslan.import <- read_excel("CrossLgReferenceData.xlsx", sheet = "Auslan",range = cell_cols("A:G"))
finsl.import <- read_excel("CrossLgReferenceData.xlsx", sheet = "FinSL",range = cell_cols("A:G"))
ssl.import <- read_excel("CrossLgReferenceData.xlsx", sheet = "SSL",range = cell_cols("A:G"))
isl.import <- read_excel("CrossLgReferenceData.xlsx", sheet = "ISL",range = cell_cols("A:G"))
master <- rbind(nts.import, auslan.import, finsl.import, ssl.import, isl.import) # Combine
master <- master %>% mutate_if(is.character,as.factor) # change characters to factors
rm(nts.import, auslan.import, finsl.import, ssl.import, isl.import) # Remove import objects
master <- master[complete.cases(master), ] # Remove NA values
# levels(master$NarrativeReferent)
levels(master$NarrativeReferent)[levels(master$NarrativeReferent)=="boyf"] <- "boy"
levels(master$NarrativeReferent)[levels(master$NarrativeReferent)=="boyfNEW"] <- "boyNEW"
levels(master$NarrativeReferent)[levels(master$NarrativeReferent)=="boyfREIN"] <- "boyREIN"
levels(master$NarrativeReferent)[levels(master$NarrativeReferent)=="boyf?"] <- "boy?"
levels(master$NarrativeReferent)[levels(master$NarrativeReferent)=="boyf?NEW"] <- "boy?NEW"
levels(master$NarrativeReferent)[levels(master$NarrativeReferent)=="boyf?REIN"] <- "boy?REIN"
# ^^ these aren't actually typos, for the record
master1 <- subset(master, NarrativeReferent %in%
c("boy","boyNEW","boyREIN",#"boy?","boy?NEW","boy?REIN",
"dog","dogNEW","dogREIN",#"dog?","dog?NEW","dog?REIN",
"frog","frogNEW","frogREIN",#"frog?","frog?NEW","frog?REIN",
"bees","beesNEW","beesREIN", #"bees?","bees?NEW","bees?REIN",
"owl","owlNEW","owlREIN",#"owl?","owl?NEW","owl?REIN",
"deer","deerNEW","deerREIN",#"deer?","deer?NEW","deer?REIN",
"jar","jarNEW","jarREIN",#"jar?","jar?NEW","jar?REIN",
"boots","bootsNEW","bootsREIN",#"boots?","boots?NEW","boots?REIN",
"window","windowNEW","windowREIN",#"window?","window?NEW","window?REIN",
"hive","hiveNEW","hiveREIN",#"hive?","hive?NEW","hive?REIN",
"rock","rockNEW","rockREIN")) #,"rock?","rock?NEW","rock?REIN",))
master1$NarrativeReferent <- factor(master1$NarrativeReferent) #remove empty levels
levels(master1$NarrativeReferent) #list levels
## [1] "bees" "beesNEW" "beesREIN" "boots" "bootsNEW"
## [6] "boy" "boyNEW" "boyREIN" "deer" "deerNEW"
## [11] "deerREIN" "dog" "dogNEW" "dogREIN" "frog"
## [16] "frogNEW" "frogREIN" "hive" "hiveNEW" "hiveREIN"
## [21] "jar" "jarNEW" "jarREIN" "owl" "owlNEW"
## [26] "owlREIN" "rock" "rockNEW" "rockREIN" "window"
## [31] "windowNEW" "windowREIN"
clean <- function(input) {
names = names(input)
for (i in 1:nrow(input)) {
row = input[i,]
split = unlist(strsplit(as.character(row$SignType), "[.]"))
components = length(split)
input[i,"component.count"] = components
for (j in 1:components) {
# take the component from the sign type, i.e. "ds" and turn it into a valid R
# identifier prefixed with "type.", i.e. "type.ds". "?" is not valid in R
# identifiers, so it gets replaced with ".q", so "mouth?" becomes "type.mouth.q".
name = split[j]
#name = paste("type", name, sep=".") #i removed this line because it cluttered the column names
name = sub("\\.?\\?$", ".q", name, perl=TRUE)
if (!name %in% names) {
input[name] = 0
names <- c(names, name)
}
input[i,name] = input[i,name] + 1
}
}
input
}
list <- 1:nrow(master1)
master1 <- cbind(list=list, master1)
temp.frame <- subset(master1, select = c("list","SignType"))
cleaned <- clean(temp.frame)
colnames(cleaned)[1] <- "list"
master.df1 <- merge(cleaned, master1, all.x = T)
# str(master.df1)
master.df1$SignType <- gsub("(CA)", "ca",master.df1$SignType, perl=TRUE)
master.df1$SignType <- gsub("(WhDS)", "WHds",master.df1$SignType, perl=TRUE)
master.df1$SignType <- gsub("(Whds)", "WHds",master.df1$SignType, perl=TRUE)
master.df1$SignType <- gsub("(.gest)", ".gesture",master.df1$SignType, perl=TRUE)
master.df1$SignType <- gsub("(.lenxp)", ".lexnp",master.df1$SignType, perl=TRUE)
master.df1$SignType <- gsub("(mouthing)", "mouth",master.df1$SignType, perl=TRUE)
master.df1$SignType <- gsub("(mouth ds)", "mouth.ds",master.df1$SignType, perl=TRUE)
master.df1$SignType <- gsub("(WH)", "WH.",master.df1$SignType, perl=TRUE)
master.df1$SignType <- factor(master.df1$SignType) #remove empty levels
temp.frame2 <- subset(master.df1, select = c("list","SignType"))
master.frame <- subset(master.df1, select =c("list", "Language", "Filename", "ClauseLikeUtterance", "NarrativeReferent", "Animacy", "Activation"))
cleaned2 <- clean(temp.frame2)
colnames(cleaned2)[1] <- "list"
master.df2 <- merge(cleaned2, master.frame, all.x = T)
master.df2 <- master.df2[ ,!(colnames(master.df2) == "1")] #remove the pesky "1" from the SignType coding
#master.df3 <- master.df2[, -which(names(master.df2) %in% c("WHlex", "WHds", "WHpt","WhDS","lenxp","WHlexnp","CA","gest"))]
master.df3 <- master.df2
# summary(master.df3$WH) #check merge worked correctly
# summary(master.df3$Language) #check merge worked correctly
#collapse all .q columns to one column (12)
master.df3$q <- master.df3$fs.q + master.df3$ds.q + master.df3$IN.q + master.df3$ca.q + master.df3$mouth.q + master.df3$.q
uncertain <- master.df3[, -which(names(master.df3) %in% c("fs.q", "ds.q", "IN.q","ca.q","mouth.q",".q","is.q","lex.q","pt.q","lexnp.q"))]
# data <- uncertain[c("list","Filename","Language","NarrativeReferent","Animacy","Activation","component.count","lex","mouth","ca","pt","fs","IN","WH","ds","lexnp","is","dl","gesture","q")] #old non-tidy way of selecting columns...
data <- uncertain %>%
select(list,Filename,Language,NarrativeReferent,Animacy,Activation,component.count,lex,mouth,ca,pt,fs,IN,WH,ds,lexnp,is,dl,gesture,q)
rm(clean,cleaned, cleaned2, list, master, master.df1,master.df2, master.df3, master1, temp.frame,temp.frame2, uncertain)
# levels(data$Activation)
data$Activation <- mapvalues(data$Activation, from = c("NEW", "MAINTENANCE","REIN","?","UNCERTAIN"), to = c("New", "Maintained", "Reintroduced","Uncertain","Uncertain"))
data <- subset(data, !Activation %in% c("Uncertain"))
# levels(data$Animacy)
data$Animacy <- mapvalues(data$Animacy, from = c("ANIMATE", "INANIMATE","BOTH","?"), to = c("Animate", "Inanimate","Both","Uncertain"))
data <- subset(data, !Animacy %in% c("Both"))
data$Animacy <- factor(data$Animacy)
data$Activation <- factor(data$Activation)
data$NarrativeReferent <- factor(data$NarrativeReferent)
# `r nrow(subset(data, Language == "Auslan"))` to report inline
## Language Activation Animacy count proportion strategies
## 1 FinSL Maintained Animate 299 0.50 1.836120
## 2 FinSL Maintained Inanimate 64 0.11 1.828125
## 3 FinSL New Animate 28 0.05 2.392857
## 4 FinSL New Inanimate 22 0.04 3.000000
## 5 FinSL Reintroduced Animate 154 0.26 2.493506
## 6 FinSL Reintroduced Inanimate 33 0.06 1.878788
## Language Activation Animacy count proportion strategies
## 1 ISL Maintained Animate 348 0.52 1.614943
## 2 ISL Maintained Inanimate 39 0.06 1.641026
## 3 ISL New Animate 29 0.04 2.793103
## 4 ISL New Inanimate 22 0.03 2.954545
## 5 ISL Reintroduced Animate 202 0.30 2.485149
## 6 ISL Reintroduced Inanimate 33 0.05 1.818182
## Language Activation Animacy count proportion strategies
## 1 NSL Maintained Animate 380 0.55 1.413158
## 2 NSL Maintained Inanimate 55 0.08 1.636364
## 3 NSL New Animate 28 0.04 2.285714
## 4 NSL New Inanimate 18 0.03 2.888889
## 5 NSL Reintroduced Animate 187 0.27 2.090909
## 6 NSL Reintroduced Inanimate 24 0.03 2.333333
## Language Activation Animacy count proportion strategies
## 1 SSL Maintained Animate 276 0.51 1.463768
## 2 SSL Maintained Inanimate 39 0.07 1.923077
## 3 SSL New Animate 25 0.05 2.560000
## 4 SSL New Inanimate 21 0.04 3.571429
## 5 SSL Reintroduced Animate 149 0.27 2.389262
## 6 SSL Reintroduced Inanimate 32 0.06 2.281250
data_long2 <- melt(data,
# ID variables - all the variables to keep but not split apart on
id.vars=c("Language", "Activation","Animacy","Filename", "NarrativeReferent"),
# The source columns
measure.vars=c("lex", "mouth", "ca", "pt", "fs", "IN","WH", "ds", "lexnp", "is", "dl" ,"gesture", "q" ),
# Name of the destination column that will identify the original
# column that the measurement came from
variable.name="strategy",
value.name="occurance")
# head(data_long2)
long.table2 <- ddply(data_long2, c("Filename", "Language", "Activation","Animacy","strategy"),summarise, occurance = round(mean(occurance),4))
# long.table2
long.table2$occ.c <- long.table2$occurance - mean(long.table2$occurance) #center
long.table2$occ.z <- long.table2$occ.c / sd(long.table2$occurance) #standardize
contrasts(data$Animacy) <- contr.sum(2)
contrasts(data$Animacy)
# head(long.table2)
long.table2$Activation2 <- factor(long.table2$Activation, levels = c("Maintained", "Reintroduced","New"))
# levels(long.table2$Activation2)
long.table2$Language2 <- factor(long.table2$Language, levels = c("Auslan","SSL","NSL","ISL", "FinSL"))
# levels(long.table2$Language2)
# head(long.table2)
#ideal model
# occ.m0 <- lmer(occ.z ~ strategy * Language2 + Activation + Animacy + (1+strategy*Language2|Filename), data = long.table2) #careful this takes about > 2hours
# summary(occ.m0)
occ.m1 <- lmer(occ.z ~ strategy * Language2 + Activation + Animacy + (1+Language2|Filename), data = long.table2)
summary(occ.m1)
I also tried grouping semiotic strats to simplify the model, analysis not displayed in this markup.
However, results indicate that we can justify exploratory analysis
Heatmaps are my new favorite plot type
Now that we are confident we can include all our semiotic strategies in exploratory analysis we start looking at cooccurances. First confirm that the data is enough coorrelated to warrent a cluster analysis.
auslan <- subset(data, Language == "Auslan")
auslan.occurances <- ddply(auslan, c("NarrativeReferent"),summarise,
lex = mean(lex),lexnp = mean(lexnp), fs = mean(fs), mouth = mean(mouth),
dl = mean(dl), ds = mean(ds), pt = mean(pt), is = mean(is),
gest = mean(gesture), ca = mean(ca),IN = mean(IN),
wh = mean(WH), q = mean(q))
#auslan.occurances1 <- subset(occurances, select = -c(num)) #remove those with low instances
auslan.cortab <- round(cor(auslan.occurances[, 2:(ncol(auslan.occurances))], method="spearman"),2)^2 #spearman correlation has no assumptions about the distribution of the data
auslan.occurances2 <- subset(auslan.occurances, select = -c(NarrativeReferent))
auslan.corr <- round(cor(auslan.occurances2), 1)
ggcorrplot(auslan.corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlogram of Auslan Semiotic strategies",
ggtheme=theme_bw)
auslan.cortab <- as.data.frame(round(cor(auslan.occurances[, 2:(ncol(auslan.occurances))], method="spearman"),2)^2)
auslan.cortab$Index <- apply(auslan.cortab, 1, function(x) paste(colnames(auslan.cortab)[which(x > .3 & x < .9)], collapse = ", "))
auslan.cortab.report <- auslan.cortab$Index[auslan.cortab$Index != ""]
auslan.cortest <- cortest.bartlett(auslan.occurances[, 2:(ncol(auslan.occurances))]) #if p sig, then co-correlated
## R was not square, finding R from data
auslan.pca <- PCA(auslan.occurances, quali.sup = 1, graph = F) #run the pca
auslan.pca$eig #print eigen values
barplot(auslan.pca$eig[,2], names = 1:nrow(auslan.pca$eig), xlab = "Components", ylab = "Percentage of explained variance")
However the bartlett test still finds the whole correlation frame to be significantly co-correlated. Unsure what to do with these mixed messages.
auslan.df <- auslan.occurances #rename dataframe because will be changing labelling (rownames)
rownames(auslan.df) <- auslan.occurances$NarrativeReferent
auslan.df <- subset(auslan.df, select = -c(NarrativeReferent))
par(mfrow = c(1,1))
auslan.pca2 <- PCA(auslan.df, ncp = 7, graph = T)
auslan.hcpc <- HCPC(auslan.pca2, max = 3,graph = F)
Most central (prototypical) members of each cluster
auslan.hcpc$desc.ind$para
## Cluster: 1
## jar bees jarREIN windowREIN dog
## 0.8255535 1.0604249 1.1469012 1.4797074 1.7129512
## --------------------------------------------------------
## Cluster: 2
## boyREIN boy owlREIN
## 1.701857 3.976011 4.555127
## --------------------------------------------------------
## Cluster: 3
## beesREIN dogREIN frogREIN rockNEW beesNEW
## 1.590161 1.703463 1.818677 1.870236 2.071075
The center of the cluster is 0. Referents closest to the center of the cluster are listed first.
Top semiotic behaviors in each cluster
auslan.table
## cluster coding cluster.mean overall.mean p.value
## 2 1 IN 0.357 0.258 0.025
## 1 1 wh 0.271 0.167 0.010
## 7 1 mouth 0.135 0.433 0.000
## 5 1 lex 0.130 0.336 0.000
## 3 1 fs 0.054 0.178 0.012
## 11 2 ca 0.479 0.148 0.008
## 9 2 dl 0.218 0.032 0.000
## 10 2 gest 0.002 0.000 0.002
## 8 3 mouth 0.844 0.433 0.000
## 6 3 lex 0.560 0.336 0.001
## 4 3 fs 0.372 0.178 0.001
## 12 3 lexnp 0.108 0.044 0.024
NSL <- subset(data, Language == "NSL")
NSL.occurances <- ddply(NSL, c("NarrativeReferent"),summarise,
lex = mean(lex),lexnp = mean(lexnp), fs = mean(fs), mouth = mean(mouth),
dl = mean(dl), ds = mean(ds), pt = mean(pt), is = mean(is),
gest = mean(gesture), ca = mean(ca),IN = mean(IN),
wh = mean(WH), q = mean(q))
NSL.occurances <- subset(NSL.occurances, select = -c(fs)) #remove those with low instances
NSL.cortab <- round(cor(NSL.occurances[, 2:(ncol(NSL.occurances))], method="spearman"),2)^2 #spearman correlation has no assumptions about the distribution of the data
NSL.cortab
## lex lexnp mouth dl ds pt is gest ca
## lex 1.0000 0.0121 0.5776 0.0016 0.2704 0.0036 0.0400 0.0000 0.0049
## lexnp 0.0121 1.0000 0.1521 0.0289 0.1936 0.2116 0.0036 0.0196 0.0100
## mouth 0.5776 0.1521 1.0000 0.0025 0.1369 0.0576 0.0784 0.0036 0.0016
## dl 0.0016 0.0289 0.0025 1.0000 0.0576 0.0625 0.0256 0.0081 0.3136
## ds 0.2704 0.1936 0.1369 0.0576 1.0000 0.0484 0.0081 0.0064 0.0225
## pt 0.0036 0.2116 0.0576 0.0625 0.0484 1.0000 0.0900 0.0036 0.0001
## is 0.0400 0.0036 0.0784 0.0256 0.0081 0.0900 1.0000 0.0484 0.0000
## gest 0.0000 0.0196 0.0036 0.0081 0.0064 0.0036 0.0484 1.0000 0.0100
## ca 0.0049 0.0100 0.0016 0.3136 0.0225 0.0001 0.0000 0.0100 1.0000
## IN 0.0484 0.0529 0.1681 0.0036 0.0100 0.0036 0.1521 0.0484 0.0225
## wh 0.0900 0.0676 0.1444 0.0025 0.2601 0.0900 0.0049 0.0400 0.0225
## q 0.0009 0.0001 0.0256 0.0004 0.0324 0.0001 0.0144 0.0196 0.1764
## IN wh q
## lex 0.0484 0.0900 0.0009
## lexnp 0.0529 0.0676 0.0001
## mouth 0.1681 0.1444 0.0256
## dl 0.0036 0.0025 0.0004
## ds 0.0100 0.2601 0.0324
## pt 0.0036 0.0900 0.0001
## is 0.1521 0.0049 0.0144
## gest 0.0484 0.0400 0.0196
## ca 0.0225 0.0225 0.1764
## IN 1.0000 0.0001 0.0144
## wh 0.0001 1.0000 0.0289
## q 0.0144 0.0289 1.0000
NSL.occurances2 <- subset(NSL.occurances, select = -c(NarrativeReferent))
NSL.corr <- round(cor(NSL.occurances2), 1)
ggcorrplot(NSL.corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlogram of NSL Semiotic strategies",
ggtheme=theme_bw)
NSL.cortab <- as.data.frame(round(cor(NSL.occurances[, 2:(ncol(NSL.occurances))], method="spearman"),2)^2)
NSL.cortab$Index <- apply(NSL.cortab, 1, function(x) paste(colnames(NSL.cortab)[which(x > .3 & x < .9)], collapse = ", "))
NSL.cortab.report <- NSL.cortab$Index[NSL.cortab$Index != ""]
NSL.cortest <- cortest.bartlett(NSL.occurances[, 2:(ncol(NSL.occurances))]) #if p sig, then co-correlated
## R was not square, finding R from data
NSL.pca <- PCA(NSL.occurances, quali.sup = 1, graph = F) #run the pca
NSL.pca$eig #print eigen values
barplot(NSL.pca$eig[,2], names = 1:nrow(NSL.pca$eig), xlab = "Components", ylab = "Percentage of explained variance")
However the bartlett test still finds the whole correlation frame to be significantly co-correlated. Unsure what to do with these mixed messages.
NSL.df <- NSL.occurances #rename dataframe because will be changing labelling (rownames)
rownames(NSL.df) <- NSL.occurances$NarrativeReferent
NSL.df <- subset(NSL.df, select = -c(NarrativeReferent))
par(mfrow = c(1,1))
NSL.pca2 <- PCA(NSL.df, ncp = 6, graph = T)
NSL.hcpc <- HCPC(NSL.pca2, max = 3,graph = F)
Most central (prototypical) members of each cluster
NSL.hcpc$desc.ind$para
## Cluster: 1
## bees jarREIN deer jar dog
## 0.9618113 1.0964162 1.2447966 1.3369165 1.7729490
## --------------------------------------------------------
## Cluster: 2
## boyREIN boy dogREIN
## 0.551440 1.036889 1.081964
## --------------------------------------------------------
## Cluster: 3
## frogREIN windowNEW dogNEW owlREIN beesNEW
## 0.8143005 1.1826362 1.3241244 1.8657728 1.9044351
The center of the cluster is 0. Referents closest to the center of the cluster are listed first.
Top semiotic behaviors in each cluster
NSL.table
## cluster coding cluster.mean overall.mean p.value
## 1 1 ds 0.683 0.513 0.010
## 7 1 mouth 0.284 0.503 0.000
## 4 1 wh 0.226 0.143 0.047
## 9 1 lex 0.148 0.366 0.000
## 3 1 is 0.029 0.017 0.030
## 5 1 dl 0.000 0.003 0.035
## 11 2 ca 0.530 0.139 0.001
## 6 2 dl 0.033 0.003 0.000
## 8 3 mouth 0.927 0.503 0.000
## 10 3 lex 0.756 0.366 0.000
## 2 3 ds 0.290 0.513 0.046
# FinSL Clustering Analysis
FinSL <- subset(data, Language == "FinSL")
FinSL.occurances <- ddply(FinSL, c("NarrativeReferent"),summarise,
lex = mean(lex),lexnp = mean(lexnp), fs = mean(fs), mouth = mean(mouth),
dl = mean(dl), ds = mean(ds), pt = mean(pt), is = mean(is),
gest = mean(gesture), ca = mean(ca),IN = mean(IN),
wh = mean(WH), q = mean(q))
# FinSL.occurances <- subset(FinSL.occurances, select = -c(fs)) #remove those with low instances
FinSL.cortab <- round(cor(FinSL.occurances[, 2:(ncol(FinSL.occurances))], method="spearman"),2)^2 #spearman correlation has no assumptions about the distribution of the data
FinSL.cortab
## lex lexnp fs mouth dl ds pt is gest
## lex 1.0000 0.1521 0.0324 0.6724 0.0016 0.0064 0.0676 0.0729 0.0009
## lexnp 0.1521 1.0000 0.0324 0.0169 0.0004 0.0081 0.0025 0.0784 0.0324
## fs 0.0324 0.0324 1.0000 0.0100 0.0100 0.0100 0.0121 0.0225 0.0049
## mouth 0.6724 0.0169 0.0100 1.0000 0.0289 0.0324 0.0529 0.0625 0.0484
## dl 0.0016 0.0004 0.0100 0.0289 1.0000 0.0064 0.0169 0.0961 0.0576
## ds 0.0064 0.0081 0.0100 0.0324 0.0064 1.0000 0.1089 0.0441 0.0049
## pt 0.0676 0.0025 0.0121 0.0529 0.0169 0.1089 1.0000 0.0036 0.0196
## is 0.0729 0.0784 0.0225 0.0625 0.0961 0.0441 0.0036 1.0000 0.0081
## gest 0.0009 0.0324 0.0049 0.0484 0.0576 0.0049 0.0196 0.0081 1.0000
## ca 0.0049 0.0196 0.0169 0.0036 0.3969 0.0361 0.1024 0.0169 0.0049
## IN 0.1089 0.0000 0.0400 0.1849 0.0529 0.0676 0.0025 0.0169 0.0025
## wh 0.0961 0.0025 0.0009 0.1089 0.0009 0.1764 0.0009 0.0004 0.0169
## q 0.0484 0.0036 0.0400 0.1764 0.2116 0.0324 0.0484 0.0000 0.0324
## ca IN wh q
## lex 0.0049 0.1089 0.0961 0.0484
## lexnp 0.0196 0.0000 0.0025 0.0036
## fs 0.0169 0.0400 0.0009 0.0400
## mouth 0.0036 0.1849 0.1089 0.1764
## dl 0.3969 0.0529 0.0009 0.2116
## ds 0.0361 0.0676 0.1764 0.0324
## pt 0.1024 0.0025 0.0009 0.0484
## is 0.0169 0.0169 0.0004 0.0000
## gest 0.0049 0.0025 0.0169 0.0324
## ca 1.0000 0.0121 0.0289 0.3025
## IN 0.0121 1.0000 0.0004 0.0049
## wh 0.0289 0.0004 1.0000 0.0064
## q 0.3025 0.0049 0.0064 1.0000
FinSL.occurances2 <- subset(FinSL.occurances, select = -c(NarrativeReferent))
FinSL.corr <- round(cor(FinSL.occurances2), 1)
ggcorrplot(FinSL.corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlogram of FinSL Semiotic strategies",
ggtheme=theme_bw)
FinSL.cortab <- as.data.frame(round(cor(FinSL.occurances[, 2:(ncol(FinSL.occurances))], method="spearman"),2)^2)
FinSL.cortab$Index <- apply(FinSL.cortab, 1, function(x) paste(colnames(FinSL.cortab)[which(x > .3 & x < .9)], collapse = ", "))
FinSL.cortab.report <- FinSL.cortab$Index[FinSL.cortab$Index != ""]
FinSL.cortest <- cortest.bartlett(FinSL.occurances[, 2:(ncol(FinSL.occurances))]) #if p sig, then co-correlated
## R was not square, finding R from data
FinSL.pca <- PCA(FinSL.occurances, quali.sup = 1, graph = F) #run the pca
FinSL.pca$eig #print eigen values
barplot(FinSL.pca$eig[,2], names = 1:nrow(FinSL.pca$eig), xlab = "Components", ylab = "Percentage of explained variance")
However the bartlett test still finds the whole correlation frame to be significantly co-correlated. Unsure what to do with these mixed messages.
FinSL.df <- FinSL.occurances #rename dataframe because will be changing labelling (rownames)
rownames(FinSL.df) <- FinSL.occurances$NarrativeReferent
FinSL.df <- subset(FinSL.df, select = -c(NarrativeReferent))
par(mfrow = c(1,1))
FinSL.pca2 <- PCA(FinSL.df, ncp = 7, graph = T)
FinSL.hcpc <- HCPC(FinSL.pca2, max = 3,graph = F)
Most central (prototypical) members of each cluster
FinSL.hcpc$desc.ind$para
## Cluster: 1
## window windowREIN hive jar jarREIN
## 1.140464 1.307465 1.368188 1.589189 1.611950
## --------------------------------------------------------
## Cluster: 2
## boy boyREIN dogREIN dog
## 0.7944186 1.4300421 1.5308766 1.7525523
## --------------------------------------------------------
## Cluster: 3
## frogREIN beesNEW windowNEW owlREIN boyNEW
## 0.6867219 1.6537180 1.6548275 1.7366753 2.3579961
The center of the cluster is 0. Referents closest to the center of the cluster are listed first.
Top semiotic behaviors in each cluster
FinSL.table
## cluster coding cluster.mean overall.mean p.value
## 4 1 ds 0.653 0.512 0.046
## 3 1 IN 0.392 0.281 0.010
## 9 1 lex 0.325 0.504 0.000
## 7 1 mouth 0.302 0.479 0.001
## 1 1 wh 0.170 0.110 0.007
## 5 1 dl 0.000 0.015 0.021
## 11 2 ca 0.626 0.149 0.000
## 6 2 dl 0.119 0.015 0.000
## 8 3 mouth 0.856 0.479 0.000
## 10 3 lex 0.850 0.504 0.000
## 2 3 wh 0.023 0.110 0.022
SSL <- subset(data, Language == "SSL")
SSL.occurances <- ddply(SSL, c("NarrativeReferent"),summarise,
lex = mean(lex),lexnp = mean(lexnp), fs = mean(fs), mouth = mean(mouth),
dl = mean(dl), ds = mean(ds), pt = mean(pt), is = mean(is),
gest = mean(gesture), ca = mean(ca),IN = mean(IN),
wh = mean(WH), q = mean(q))
SSL.occurances <- subset(SSL.occurances, select = -c(gest,q)) #remove those with low instances
SSL.cortab <- round(cor(SSL.occurances[, 2:(ncol(SSL.occurances))], method="spearman"),2)^2 #spearman correlation has no assumptions about the distribution of the data
SSL.cortab
## lex lexnp fs mouth dl ds pt is ca
## lex 1.0000 0.0121 0.0004 0.8100 0.0016 0.0196 0.0016 0.0256 0.0009
## lexnp 0.0121 1.0000 0.0256 0.1156 0.0625 0.3136 0.1089 0.0064 0.0121
## fs 0.0004 0.0256 1.0000 0.0196 0.0625 0.0484 0.1681 0.0121 0.0169
## mouth 0.8100 0.1156 0.0196 1.0000 0.0004 0.0361 0.0256 0.0361 0.0064
## dl 0.0016 0.0625 0.0625 0.0004 1.0000 0.2025 0.2304 0.0144 0.3721
## ds 0.0196 0.3136 0.0484 0.0361 0.2025 1.0000 0.0961 0.0009 0.3721
## pt 0.0016 0.1089 0.1681 0.0256 0.2304 0.0961 1.0000 0.1600 0.0841
## is 0.0256 0.0064 0.0121 0.0361 0.0144 0.0009 0.1600 1.0000 0.0256
## ca 0.0009 0.0121 0.0169 0.0064 0.3721 0.3721 0.0841 0.0256 1.0000
## IN 0.0196 0.1936 0.0361 0.0441 0.0576 0.0081 0.0361 0.0081 0.0225
## wh 0.0676 0.0324 0.0100 0.0225 0.0169 0.0121 0.0081 0.0169 0.0529
## IN wh
## lex 0.0196 0.0676
## lexnp 0.1936 0.0324
## fs 0.0361 0.0100
## mouth 0.0441 0.0225
## dl 0.0576 0.0169
## ds 0.0081 0.0121
## pt 0.0361 0.0081
## is 0.0081 0.0169
## ca 0.0225 0.0529
## IN 1.0000 0.0196
## wh 0.0196 1.0000
SSL.occurances2 <- subset(SSL.occurances, select = -c(NarrativeReferent))
SSL.corr <- round(cor(SSL.occurances2), 1)
ggcorrplot(SSL.corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlogram of SSL Semiotic strategies",
ggtheme=theme_bw)
SSL.cortab <- as.data.frame(round(cor(SSL.occurances[, 2:(ncol(SSL.occurances))], method="spearman"),2)^2)
SSL.cortab$Index <- apply(SSL.cortab, 1, function(x) paste(colnames(SSL.cortab)[which(x > .3 & x < .9)], collapse = ", "))
SSL.cortab.report <- SSL.cortab$Index[SSL.cortab$Index != ""]
SSL.cortest <- cortest.bartlett(SSL.occurances[, 2:(ncol(SSL.occurances))]) #if p sig, then co-correlated
## R was not square, finding R from data
SSL.pca <- PCA(SSL.occurances, quali.sup = 1, graph = F) #run the pca
SSL.pca$eig #print eigen values
barplot(SSL.pca$eig[,2], names = 1:nrow(SSL.pca$eig), xlab = "Components", ylab = "Percentage of explained variance")
However the bartlett test still finds the whole correlation frame to be significantly co-correlated. Unsure what to do with these mixed messages.
SSL.df <- SSL.occurances #rename dataframe because will be changing labelling (rownames)
rownames(SSL.df) <- SSL.occurances$NarrativeReferent
SSL.df <- subset(SSL.df, select = -c(NarrativeReferent))
par(mfrow = c(1,1))
SSL.pca2 <- PCA(SSL.df, ncp = 5, graph = T)
SSL.hcpc <- HCPC(SSL.pca2, max = 3,graph = F)
Most central (prototypical) members of each cluster
SSL.hcpc$desc.ind$para
## Cluster: 1
## deerNEW windowREIN window rock jar
## 0.6799906 0.9348407 1.0278956 1.0747697 1.0785145
## --------------------------------------------------------
## Cluster: 2
## windowNEW beesREIN frogREIN bootsNEW beesNEW
## 0.9748434 0.9848163 1.0773566 1.6910901 1.8270525
## --------------------------------------------------------
## Cluster: 3
## owlREIN boyREIN dog boy owlNEW
## 1.368911 2.172187 2.185436 2.644263 3.543535
The center of the cluster is 0. Referents closest to the center of the cluster are listed first.
Top semiotic behaviors in each cluster
SSL.table
## cluster coding cluster.mean overall.mean p.value
## 3 1 ds 0.672 0.525 0.033
## 1 1 IN 0.401 0.269 0.006
## 5 1 wh 0.298 0.185 0.043
## 8 1 mouth 0.188 0.473 0.000
## 6 1 lex 0.163 0.401 0.000
## 9 2 mouth 0.933 0.473 0.000
## 7 2 lex 0.901 0.401 0.000
## 11 3 ca 0.448 0.168 0.001
## 12 3 pt 0.248 0.106 0.005
## 10 3 dl 0.193 0.058 0.001
## 4 3 ds 0.185 0.525 0.006
## 14 3 lexnp 0.172 0.053 0.036
## 13 3 fs 0.076 0.020 0.015
## 2 3 IN 0.043 0.269 0.008
ISL <- subset(data, Language == "ISL")
ISL.occurances <- ddply(ISL, c("NarrativeReferent"),summarise,
lex = mean(lex),lexnp = mean(lexnp), fs = mean(fs), mouth = mean(mouth),
dl = mean(dl), ds = mean(ds), pt = mean(pt), is = mean(is),
gest = mean(gesture), ca = mean(ca),IN = mean(IN),
wh = mean(WH), q = mean(q))
ISL.occurances <- subset(ISL.occurances, select = -c(gest,q)) #remove those with low instances
ISL.cortab <- round(cor(ISL.occurances[, 2:(ncol(ISL.occurances))], method="spearman"),2)^2 #spearman correlation has no assumptions about the distribution of the data
ISL.cortab
## lex lexnp fs mouth dl ds pt is ca
## lex 1.0000 0.0049 0.1024 0.6889 0.0036 0.0025 0.0121 0.0081 0.0064
## lexnp 0.0049 1.0000 0.0484 0.0961 0.0144 0.1681 0.0025 0.0009 0.0009
## fs 0.1024 0.0484 1.0000 0.1444 0.0256 0.1764 0.0121 0.0324 0.0081
## mouth 0.6889 0.0961 0.1444 1.0000 0.0100 0.0009 0.0100 0.0001 0.0009
## dl 0.0036 0.0144 0.0256 0.0100 1.0000 0.0064 0.0001 0.0529 0.3249
## ds 0.0025 0.1681 0.1764 0.0009 0.0064 1.0000 0.0009 0.0100 0.0121
## pt 0.0121 0.0025 0.0121 0.0100 0.0001 0.0009 1.0000 0.0064 0.0016
## is 0.0081 0.0009 0.0324 0.0001 0.0529 0.0100 0.0064 1.0000 0.0225
## ca 0.0064 0.0009 0.0081 0.0009 0.3249 0.0121 0.0016 0.0225 1.0000
## IN 0.1600 0.0025 0.0400 0.3249 0.0529 0.1156 0.0049 0.1024 0.1681
## wh 0.1089 0.0036 0.0196 0.0961 0.0484 0.0225 0.0196 0.0169 0.0016
## IN wh
## lex 0.1600 0.1089
## lexnp 0.0025 0.0036
## fs 0.0400 0.0196
## mouth 0.3249 0.0961
## dl 0.0529 0.0484
## ds 0.1156 0.0225
## pt 0.0049 0.0196
## is 0.1024 0.0169
## ca 0.1681 0.0016
## IN 1.0000 0.0064
## wh 0.0064 1.0000
ISL.occurances2 <- subset(ISL.occurances, select = -c(NarrativeReferent))
ISL.corr <- round(cor(ISL.occurances2), 1)
ggcorrplot(ISL.corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlogram of ISL Semiotic strategies",
ggtheme=theme_bw)
ISL.cortab <- as.data.frame(round(cor(ISL.occurances[, 2:(ncol(ISL.occurances))], method="spearman"),2)^2)
ISL.cortab$Index <- apply(ISL.cortab, 1, function(x) paste(colnames(ISL.cortab)[which(x > .3 & x < .9)], collapse = ", "))
ISL.cortab.report <- ISL.cortab$Index[ISL.cortab$Index != ""]
ISL.cortest <- cortest.bartlett(ISL.occurances[, 2:(ncol(ISL.occurances))]) #if p sig, then co-correlated
## R was not square, finding R from data
ISL.pca <- PCA(ISL.occurances, quali.sup = 1, graph = F) #run the pca
ISL.pca$eig #print eigen values
barplot(ISL.pca$eig[,2], names = 1:nrow(ISL.pca$eig), xlab = "Components", ylab = "Percentage of explained variance")
However the bartlett test still finds the whole correlation frame to be significantly co-correlated. Unsure what to do with these mixed messages.
ISL.df <- ISL.occurances #rename dataframe because will be changing labelling (rownames)
rownames(ISL.df) <- ISL.occurances$NarrativeReferent
ISL.df <- subset(ISL.df, select = -c(NarrativeReferent))
par(mfrow = c(1,1))
ISL.pca2 <- PCA(ISL.df, ncp = 6, graph = T)
ISL.hcpc <- HCPC(ISL.pca2, max = 3,graph = F)
Most central (prototypical) members of each cluster
ISL.hcpc$desc.ind$para
## Cluster: 1
## frog bees dog owl windowREIN
## 1.121817 1.332203 1.485279 1.718586 1.753860
## --------------------------------------------------------
## Cluster: 2
## owlREIN boy dogREIN deerREIN boyREIN
## 1.016299 1.233968 1.617385 1.619432 2.158819
## --------------------------------------------------------
## Cluster: 3
## hiveREIN beesREIN owlNEW windowNEW rockNEW
## 1.333214 1.777564 1.788830 1.849798 1.893235
The center of the cluster is 0. Referents closest to the center of the cluster are listed first.
Top semiotic behaviors in each cluster
ISL.table
## cluster coding cluster.mean overall.mean p.value
## 1 1 IN 0.535 0.336 0.003
## 6 1 mouth 0.148 0.461 0.000
## 3 1 wh 0.121 0.062 0.021
## 8 1 lex 0.074 0.362 0.000
## 4 1 fs 0.057 0.157 0.032
## 10 2 ca 0.662 0.210 0.000
## 2 2 IN 0.113 0.336 0.025
## 12 2 dl 0.015 0.003 0.001
## 7 3 mouth 0.724 0.461 0.000
## 9 3 lex 0.621 0.362 0.000
## 5 3 fs 0.289 0.157 0.003
## 13 3 pt 0.185 0.108 0.022
## 11 3 ca 0.072 0.210 0.028
Plot distribution of number semiotic behaviors per CLU for activation and animacy.
data$c.cc <- data$component.count - mean(data$component.count) #center
data$z.cc <- data$c.cc / sd(data$component.count) #standardize
contrasts(data$Animacy) <- contr.sum(2)
# contrasts(data$Animacy)
# levels(data$Language2)
# head(data)
cc.m <- lmer(z.cc ~ Activation2 * Animacy + Language2 + (1+Animacy|Filename), data = data)
summary(cc.m)
Yes it appears these patterns are significant. Just as we found for Auslan, when we control for random variation in individuals and referents we find a cross-linguistic pattern for: