library(tidyverse)
library(here)
library(knitr)
library(broom)
library(Matrix)
library(dendextend)
library(cowplot)
TAXONOMIC_PATH <- here("data/raw/taxonomy_matrix.mat")
taxonomic_data <- R.matlab::readMat(TAXONOMIC_PATH)[[2]]
LABELS <- c("shark", "swan", "flamingo", "pigeon", "crow", "elephant",
"mammoth", "sloth", "beaver", "gorilla", "bat", "rhino",
"zebra", "llama", "hippo", "killerwhale", "dolphin", "giraffe",
"sheep", "goat", "deer", "pig", "boar", "lion", "panther", "cheetah",
"skunk", "panda", "polarbear", "grizzly") %>% rev() # from SI fig s2
colnames(taxonomic_data) <- LABELS
rownames(taxonomic_data) <- LABELS
taxonomic_long <- taxonomic_data %>%
as.data.frame() %>%
rownames_to_column("animal1") %>%
gather("animal2", "similarity", -animal1) %>%
mutate(sim_type = "taxonomic_similarity")
ALL COCA CLEANED
LANGUAGE_PATH <- here("data/processed/animal_distances_coca_all_cleaned_gary.csv")
language_data <- read_csv(LANGUAGE_PATH) %>%
spread(word2, language_similarity) %>%
select(-word1)
all_corrs_mat_langs <- as.matrix(language_data)
rownames(all_corrs_mat_langs) <- colnames(all_corrs_mat_langs)
language_long <- all_corrs_mat_langs %>%
as.data.frame() %>%
rownames_to_column("animal1") %>%
gather("animal2", "similarity", -animal1) %>%
mutate(sim_type = "language_coca_all")
taxo_corr <- bind_rows(taxonomic_long, language_long) %>%
filter(animal1 < animal2) %>%
spread(sim_type, similarity) %>%
mutate(participant_type = "Ground Truth") %>%
group_by(participant_type) %>%
nest() %>%
mutate(temp = map(data, ~ tidy(cor.test(.$language_coca_all,
-.$taxonomic_similarity, method = "spearman"))),
n = map(data, nrow),
dimension = "Taxonomy") %>%
select(-data) %>%
unnest() %>%
mutate(se = 1/sqrt(n-3),
estimate_se_l = estimate - se,
estimate_se_h = estimate + se,)
kable(taxo_corr)
| Ground Truth |
Taxonomy |
435 |
0.3168335 |
6149611 |
0 |
Spearman’s rank correlation rho |
two.sided |
0.0481125 |
0.268721 |
0.364946 |
ALL COCA CLEANED ANIMAL
LANGUAGE_PATH <- here("data/processed/animal_distances_coca_all_cleaned_animal_gary.csv")
language_data <- read_csv(LANGUAGE_PATH) %>%
spread(word2, language_similarity) %>%
select(-word1)
all_corrs_mat_langs <- as.matrix(language_data)
rownames(all_corrs_mat_langs) <- colnames(all_corrs_mat_langs)
language_long <- all_corrs_mat_langs %>%
as.data.frame() %>%
rownames_to_column("animal1") %>%
gather("animal2", "similarity", -animal1) %>%
mutate(sim_type = "language_coca_al_animal")
taxo_corr <- bind_rows(taxonomic_long, language_long) %>%
filter(animal1 < animal2) %>%
spread(sim_type, similarity) %>%
mutate(participant_type = "Ground Truth") %>%
group_by(participant_type) %>%
nest() %>%
mutate(temp = map(data, ~ tidy(cor.test(.$language_coca_al_animal,
-.$taxonomic_similarity,
method = "spearman"))),
n = map(data, nrow),
dimension = "Taxonomy") %>%
select(-data) %>%
unnest() %>%
mutate(se = 1/sqrt(n-3),
estimate_se_l = estimate - se,
estimate_se_h = estimate + se,)
kable(taxo_corr)
| Ground Truth |
Taxonomy |
435 |
0.2900318 |
6390870 |
0 |
Spearman’s rank correlation rho |
two.sided |
0.0481125 |
0.2419193 |
0.3381444 |