Taxonomic language distances

Clusters
Comparing clusters
- Dendogram comparision
- Pairwise correlations

LANGUAGE_PATH_WIKI <- here("data/processed/animal_distances_wiki.csv")
language_data_wiki <- read_csv(LANGUAGE_PATH_WIKI) %>%
  spread(word2, language_similarity) %>%
  select(-word1)

all_corrs_mat_langs_wiki <- as.matrix(language_data_wiki)
rownames(all_corrs_mat_langs_wiki) <- colnames(language_data_wiki)

language_long_wiki <- all_corrs_mat_langs_wiki %>%
  as.data.frame() %>%
  rownames_to_column("animal1") %>%
  gather("animal2", "similarity", -animal1) %>%
  mutate(sim_type = "language_wiki")

LANGUAGE_PATH_GOOGLE <- here("data/processed/animal_distances_google.csv")
language_data_google <- read_csv(LANGUAGE_PATH_GOOGLE) %>%
  spread(word2, language_similarity) %>%
  select(-word1)

all_corrs_mat_lang_google <- as.matrix(language_data_google)
rownames(all_corrs_mat_lang_google) <- colnames(language_data_google)

language_long_google <- all_corrs_mat_lang_google %>%
  as.data.frame() %>%
  rownames_to_column("animal1") %>%
  gather("animal2", "similarity", -animal1) %>%
  mutate(sim_type = "language_google")

TAXONOMIC_PATH <- here("data/raw/taxonomy_matrix.mat")
taxonomic_data <- readMat(TAXONOMIC_PATH)[[2]]  

LABELS <- c("shark", "swan", "flamingo", "pigeon", "crow", "elephant", "mammoth", "sloth", "beaver", "gorilla", "bat", "rhino", "zebra", "llama", "hippo", "killerwhale", "dolphin", "giraffe", "sheep", "goat", "deer", "pig", "boar", "lion", "panther", "cheetah", "skunk", "panda", "polarbear", "grizzly") %>% rev() # from SI fig s2

colnames(taxonomic_data) <- LABELS
rownames(taxonomic_data) <- LABELS

taxonomic_long <- taxonomic_data %>%
  as.data.frame() %>%
  rownames_to_column("animal1") %>%
  gather("animal2", "similarity", -animal1) %>%
  mutate(sim_type = "taxonomy")

Clusters

Wiki language

heatmaply(all_corrs_mat_langs_wiki)

Google language

heatmaply(all_corrs_mat_lang_google)

Taxonomy

heatmaply(taxonomic_data)

Comparing clusters

lang_clust_wiki <- all_corrs_mat_langs_wiki %>% 
  dist() %>% 
  hclust()

lang_clust_google <- all_corrs_mat_lang_google %>% 
  dist() %>% 
  hclust() 

taxo_clust <- taxonomic_data %>%
    dist() %>% 
    hclust()

Dendogram comparision

Wiki. vs. google

dendlist(as.dendrogram(lang_clust_wiki), as.dendrogram(lang_clust_google)) %>%
  tanglegram(common_subtrees_color_branches = TRUE, highlight_branches_lwd = F)

Wiki vs. taxonomy

dendlist(as.dendrogram(lang_clust_wiki), as.dendrogram(taxo_clust)) %>%
  tanglegram(common_subtrees_color_branches = TRUE, highlight_branches_lwd = F)

Google vs. taxonomy

dendlist(as.dendrogram(lang_clust_google), as.dendrogram(taxo_clust)) %>%
  tanglegram(common_subtrees_color_branches = TRUE, highlight_branches_lwd = F)

Pairwise correlations

tidy_long <- bind_rows(taxonomic_long, language_long_wiki) %>%
  bind_rows(language_long_google) %>%
  filter(animal1 < animal2) %>%
  spread(sim_type, similarity) 

ggplot(tidy_long, aes(x = language_google, y = language_wiki )) +
  geom_point() + 
  geom_smooth(method = "lm") +
  ggtitle("Google vs. Wiki") +
  theme_classic()

cor.test(tidy_long$language_google,tidy_long$language_wiki)

## 
##  Pearson's product-moment correlation
## 
## data:  tidy_long$language_google and tidy_long$language_wiki
## t = 12.374, df = 433, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4381427 0.5773827
## sample estimates:
##       cor 
## 0.5111082

ggplot(tidy_long, aes(x = taxonomy, y = language_wiki )) +
  geom_point() + 
  geom_smooth(method = "lm") +
  ggtitle("Wiki vs. taxonomy") +
  theme_classic()

cor.test(tidy_long$taxonomy,tidy_long$language_wiki)

## 
##  Pearson's product-moment correlation
## 
## data:  tidy_long$taxonomy and tidy_long$language_wiki
## t = -6.3327, df = 433, p-value = 6.038e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3749030 -0.2026727
## sample estimates:
##        cor 
## -0.2911451

ggplot(tidy_long, aes(x = taxonomy, y = language_google )) +
  geom_point() + 
  geom_smooth(method = "lm") +
  ggtitle("Google vs. taxonomy") +
  theme_classic()

cor.test(tidy_long$taxonomy,tidy_long$language_google)

## 
##  Pearson's product-moment correlation
## 
## data:  tidy_long$taxonomy and tidy_long$language_google
## t = -2.9438, df = 433, p-value = 0.003416
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.23105451 -0.04667161
## sample estimates:
##        cor 
## -0.1400772

tidy_long %>%
  DT::datatable()

Taxonomic language distances

Molly Lewis

2019-06-05

Clusters

Wiki language

Google language

Taxonomy

Comparing clusters

Dendogram comparision

Wiki. vs. google

Wiki vs. taxonomy

Google vs. taxonomy

Pairwise correlations