#### LOAD DATA AND COMPUTE CLUSTERS ####
#Initial code loading data and organizing clusters
LANG_ANIMAL_DISTANCE_COLOR <- here("data/processed/animal_color_distances_language_wiki.csv")
LANG_ANIMAL_DISTANCE_SHAPE<- here("data/processed/animal_shape_distances_language_wiki.csv")
LANG_ANIMAL_DISTANCE_TEXTURE <- here("data/processed/animal_texture_distances_language_wiki.csv")
TIDY_HUMAN_PATH <- here("data/processed/tidy_human_data.csv")
language_data <- read_csv(LANG_ANIMAL_DISTANCE_COLOR) %>%
left_join(read_csv(LANG_ANIMAL_DISTANCE_SHAPE), by = c("animal1", "animal2")) %>%
left_join(read_csv(LANG_ANIMAL_DISTANCE_TEXTURE),by = c("animal1", "animal2")) %>%
select(-contains("PCA"))
human_data <- read_csv(TIDY_HUMAN_PATH)
human_data_wide <- human_data %>%
unite("measure", c("participant_type", "similarity_type")) %>%
spread(measure, human_similarity)
#rename human "skin" columns to texture
colnames(human_data_wide)[colnames(human_data_wide)=="blind_human_similarity_skin"] <- "blind_human_similarity_texture"
colnames(human_data_wide)[colnames(human_data_wide)=="sighted_human_similarity_skin"] <- "sighted_human_similarity_texture"
####Re-compute clusters and organize into a list####
cluster_list=list()
data_sources=c("sighted","blind","language")
knowledge_types=c("color","shape","texture")
for (knowledge_type in knowledge_types) {
for (data_source in data_sources) {
if (data_source == "language") {
cluster_list[[knowledge_type]][[data_source]] <- language_data %>%
convert_similarity_to_distance(paste(data_source,"_similarity_simple_dist_",knowledge_type,sep=""), reverse_dist=F, human_data=F) %>%
as.dist() %>%
hclust()
} else {
cluster_list[[knowledge_type]][[data_source]] <- human_data_wide %>%
convert_similarity_to_distance(paste(data_source,"_human_similarity_",knowledge_type,sep="")) %>%
as.dist() %>%
hclust()
}
}
}
##read in data frame of cluster similarity values
cluster_similarity_values <- read_csv(here("scripts/clustering/cluster_similarity_values.csv"))
#process into shorter dataframe
cluster_similarity <- cluster_similarity_values %>%
rowwise() %>%
mutate(data_source = paste(sort(c(as.character(data_source_1), as.character(data_source_2))), collapse = "_")) %>%
ungroup() %>%
group_by(knowledge_type,data_source) %>%
select(-data_source_1,-data_source_2) %>%
summarise_all(min)
ggdendro::ggdendrogram(cluster_list[["shape"]][["language"]], rotate = T) +
ggtitle("Language Similarity Shape")
ggdendro::ggdendrogram(cluster_list[["shape"]][["blind"]], rotate = T) +
ggtitle("Blind Similarity Shape")
ggdendro::ggdendrogram(cluster_list[["shape"]][["sighted"]], rotate = T) +
ggtitle("Sighted Similarity Shape")
ggdendro::ggdendrogram(cluster_list[["texture"]][["language"]], rotate = T) +
ggtitle("Language Similarity Texture")
ggdendro::ggdendrogram(cluster_list[["texture"]][["blind"]], rotate = T) +
ggtitle("Blind Similarity Texture")
ggdendro::ggdendrogram(cluster_list[["texture"]][["sighted"]], rotate = T) +
ggtitle("Sighted Similarity Texture")
ggdendro::ggdendrogram(cluster_list[["color"]][["language"]], rotate = T) +
ggtitle("Language Similarity Color")
ggdendro::ggdendrogram(cluster_list[["color"]][["blind"]], rotate = T) +
ggtitle("Blind Similarity Color")
ggdendro::ggdendrogram(cluster_list[["color"]][["sighted"]], rotate = T) +
ggtitle("Sighted Similarity Color")
dends <- dendlist(as.dendrogram(cluster_list[["shape"]][["language"]]),as.dendrogram(cluster_list[["shape"]][["blind"]]))
x <- dends %>%
untangle(method = "step2side") %>%
tanglegram( common_subtrees_color_branches = TRUE,highlight_branches_lwd = F,margin_inner= 5.7,main_left="LANGUAGE",main_right="BLIND")
paste("Language to Blind Entanglement: ",round(entanglement(x),2))
## [1] "Language to Blind Entanglement: 0.21"
dends <- dendlist(as.dendrogram(cluster_list[["shape"]][["language"]]),as.dendrogram(cluster_list[["shape"]][["sighted"]]))
x <- dends %>%
untangle(method = "step2side") %>%
tanglegram( common_subtrees_color_branches = TRUE,highlight_branches_lwd = F,margin_inner= 5.7,main_left="LANGUAGE",main_right="SIGHTED")
paste("Language to Sighted Entanglement: ",round(entanglement(x),2))
## [1] "Language to Sighted Entanglement: 0.12"
dends <- dendlist(as.dendrogram(cluster_list[["texture"]][["language"]]),as.dendrogram(cluster_list[["texture"]][["blind"]]))
x <- dends %>%
untangle(method = "step2side") %>%
tanglegram( common_subtrees_color_branches = TRUE,highlight_branches_lwd = F,margin_inner= 5.7,main_left="LANGUAGE",main_right="BLIND")
paste("Language to Blind Entanglement: ",round(entanglement(x),2))
## [1] "Language to Blind Entanglement: 0.14"
dends <- dendlist(as.dendrogram(cluster_list[["texture"]][["language"]]),as.dendrogram(cluster_list[["texture"]][["sighted"]]))
x <- dends %>%
untangle(method = "step2side") %>%
tanglegram( common_subtrees_color_branches = TRUE,highlight_branches_lwd = F,margin_inner= 5.7,main_left="LANGUAGE",main_right="SIGHTED")
paste("Language to Sighted Entanglement: ",round(entanglement(x),2))
## [1] "Language to Sighted Entanglement: 0.17"
dends <- dendlist(as.dendrogram(cluster_list[["color"]][["language"]]),as.dendrogram(cluster_list[["color"]][["blind"]]))
x <- dends %>%
untangle(method = "step2side") %>%
tanglegram( common_subtrees_color_branches = TRUE,highlight_branches_lwd = F,margin_inner= 5.7,main_left="LANGUAGE",main_right="BLIND")
paste("Language to Blind Entanglement: ",round(entanglement(x),2))
## [1] "Language to Blind Entanglement: 0.36"
dends <- dendlist(as.dendrogram(cluster_list[["color"]][["language"]]),as.dendrogram(cluster_list[["color"]][["sighted"]]))
x <- dends %>%
untangle(method = "step2side") %>%
tanglegram( common_subtrees_color_branches = TRUE,highlight_branches_lwd = F,margin_inner= 5.7,main_left="LANGUAGE",main_right="SIGHTED")
paste("Language to Sighted Entanglement: ",round(entanglement(x),2))
## [1] "Language to Sighted Entanglement: 0.25"
ggplot(cluster_similarity,aes(data_source,entangle_step2side,fill=data_source))+
geom_bar(stat="identity")+
ylim(0,1)+
facet_wrap(~knowledge_type)+
theme(legend.position="none",
axis.text.x = element_text(angle=90, vjust=0.5))+
scale_x_discrete(limits=c("blind_sighted","language_sighted","blind_language"),
labels=c("Blind \nto Sighted", "Language \nto Sighted","Language \nto Blind"))+
scale_fill_brewer(palette="Set1")+
ylab("Entanglement")+
xlab("Clustering Comparison")
cluster_similarity_Z_FM <- cluster_similarity %>%
select(knowledge_type,data_source,Z_FM_5,Z_FM_10,Z_FM_15, Z_FM_20) %>%
gather(cluster_num,Z_FM, Z_FM_5:Z_FM_20) %>%
mutate(cluster_num=as.numeric(as.character(str_remove(cluster_num, "Z_FM_"))))
title <- "shape"
p1 <- ggplot(subset(cluster_similarity_Z_FM,knowledge_type==title),aes(data_source,Z_FM,fill=data_source))+
geom_bar(stat="identity")+
facet_wrap(~cluster_num)+
theme(legend.position="none",
axis.text.x = element_text(angle=90, vjust=0.5,size=8))+
scale_x_discrete(name="",
limits=c("blind_sighted","language_sighted","blind_language"),
labels=c("Blind \nto Sighted", "Language \nto Sighted","Language \nto Blind"))+
scale_fill_brewer(palette="Set1")+
geom_hline(yintercept=1.645, linetype="dashed")+
scale_y_continuous(limits=c(-1,16))+
ylab("FM Index (Z-Scores)")+
xlab("Clustering Comparison")+
ggtitle(title)
title <- "texture"
p2 <- ggplot(subset(cluster_similarity_Z_FM,knowledge_type==title),aes(data_source,Z_FM,fill=data_source))+
geom_bar(stat="identity")+
facet_wrap(~cluster_num)+
theme(legend.position="none",
axis.text.x = element_text(angle=90, vjust=0.5,size=8))+
scale_x_discrete(
limits=c("blind_sighted","language_sighted","blind_language"),
labels=c("Blind \nto Sighted", "Language \nto Sighted","Language \nto Blind"))+
scale_fill_brewer(palette="Set1")+
geom_hline(yintercept=1.645, linetype="dashed")+
scale_y_continuous(name="",limits=c(-1,16))+
xlab("Clustering Comparison")+
ggtitle(title)
title <- "color"
p3 <- ggplot(subset(cluster_similarity_Z_FM,knowledge_type==title),aes(data_source,Z_FM,fill=data_source))+
geom_bar(stat="identity")+
facet_wrap(~cluster_num)+
theme(legend.position="none",
axis.text.x = element_text(angle=90, vjust=0.5,size=8))+
scale_x_discrete(name="",
limits=c("blind_sighted","language_sighted","blind_language"),
labels=c("Blind \nto Sighted", "Language \nto Sighted","Language \nto Blind"))+
scale_fill_brewer(palette="Set1")+
geom_hline(yintercept=1.645, linetype="dashed")+
scale_y_continuous(name="",limits=c(-1,16))+
xlab("Clustering Comparison")+
ggtitle(title)
plot_grid(p1,p2,p3,ncol=3)
cluster_similarity_adjustedRand <- cluster_similarity %>%
select(knowledge_type,data_source,adjustedRand_5,adjustedRand_10,adjustedRand_15, adjustedRand_20) %>%
gather(cluster_num,adjustedRand, adjustedRand_5:adjustedRand_20) %>%
mutate(cluster_num=as.numeric(as.character(str_remove(cluster_num, "adjustedRand_"))))
title <- "shape"
p1 <- ggplot(subset(cluster_similarity_adjustedRand,knowledge_type==title),aes(data_source,adjustedRand,fill=data_source))+
geom_bar(stat="identity")+
facet_wrap(~cluster_num)+
theme(legend.position="none",
axis.text.x = element_text(angle=90, vjust=0.5,size=8))+
scale_x_discrete(name="",
limits=c("blind_sighted","language_sighted","blind_language"),
labels=c("Blind \nto Sighted", "Language \nto Sighted","Language \nto Blind"))+
scale_fill_brewer(palette="Set1")+
scale_y_continuous(limits=c(-0.1,1))+
geom_hline(yintercept=0)+
ylab("Adjusted Rand Index")+
xlab("Clustering Comparison")+
ggtitle(title)
title <- "texture"
p2 <- ggplot(subset(cluster_similarity_adjustedRand,knowledge_type==title),aes(data_source,adjustedRand,fill=data_source))+
geom_bar(stat="identity")+
facet_wrap(~cluster_num)+
theme(legend.position="none",
axis.text.x = element_text(angle=90, vjust=0.5,size=8))+
scale_x_discrete(limits=c("blind_sighted","language_sighted","blind_language"),
labels=c("Blind \nto Sighted", "Language \nto Sighted","Language \nto Blind"))+
scale_fill_brewer(palette="Set1")+
geom_hline(yintercept=0)+
scale_y_continuous(name="", limits=c(-0.1,1))+
xlab("Clustering Comparison")+
ggtitle(title)
title <- "color"
p3 <- ggplot(subset(cluster_similarity_adjustedRand,knowledge_type==title),aes(data_source,adjustedRand,fill=data_source))+
geom_bar(stat="identity")+
facet_wrap(~cluster_num)+
theme(legend.position="none",
axis.text.x = element_text(angle=90, vjust=0.5,size=8))+
scale_x_discrete(
name="",
limits=c("blind_sighted","language_sighted","blind_language"),
labels=c("Blind \nto Sighted", "Language \nto Sighted","Language \nto Blind"))+
scale_fill_brewer(palette="Set1")+
scale_y_continuous(name="", limits=c(-0.1,1))+
geom_hline(yintercept=0)+
xlab("Clustering Comparison")+
ggtitle(title)
plot_grid(p1,p2,p3,ncol=3)