COLOR_PATH <- here("data/keb_reanalysis/animal_color_distances_language_kid.csv")
TEXTURE_PATH <- here("data/keb_reanalysis/animal_texture_distances_language_kid.csv")
SHAPE_PATH <- here("data/keb_reanalysis/animal_shape_distances_language_kid.csv")
HUMAN_DATA <- here("data/keb_reanalysis/tidy_human_data.csv")
human_data <- read_csv(HUMAN_DATA)
color_data <- read_csv(COLOR_PATH)
texture_data <- read_csv(TEXTURE_PATH)
shape_data <- read_csv(SHAPE_PATH)
In the Main Text, we report correlations between the pairwise animals distances for each of the three dimensions (language vs. human/ground truth). These are presented more fully below.
### Shape-texture-color plot ###
LANG_ANIMAL_DISTANCE_COLOR <- here("data/keb_reanalysis/animal_color_distances_language_kid.csv")
LANG_ANIMAL_DISTANCE_SHAPE<- here("data/keb_reanalysis/animal_shape_distances_language_kid.csv")
LANG_ANIMAL_DISTANCE_TEXTURE <- here("data/keb_reanalysis/animal_texture_distances_language_kid.csv")
# human
TIDY_HUMAN_PATH <- here("data/keb_reanalysis/tidy_human_data.csv")
language_data <- read_csv(LANG_ANIMAL_DISTANCE_COLOR) %>%
left_join(read_csv(LANG_ANIMAL_DISTANCE_SHAPE), by = c("animal1", "animal2")) %>%
left_join(read_csv(LANG_ANIMAL_DISTANCE_TEXTURE),by = c("animal1", "animal2")) %>%
mutate_if(is.numeric, ~-.x )
human_data <- read_csv(TIDY_HUMAN_PATH)
full_sim_data_wide2 <- full_join(language_data, human_data,
by = c("animal1", "animal2")) %>%
spread(similarity_type, human_similarity) %>%
filter(animal1 < animal2)
language_long <- full_sim_data_wide2 %>%
select(contains("animal"), contains("language")) %>%
distinct() %>%
gather(similarity_type, language_similarity, -animal1, -animal2) %>%
rowwise() %>%
mutate(similarity_type = str_split(similarity_type, "dist_")[[1]][2])
human_long <- full_sim_data_wide2 %>%
select(contains("animal"), contains("human"), participant_type) %>%
gather(similarity_type, human_similarity, -animal1, -animal2, -participant_type) %>%
rowwise() %>%
mutate(similarity_type = str_split(similarity_type, "similarity_")[[1]][2],
similarity_type = case_when(similarity_type == "skin" ~ "texture",
TRUE ~ similarity_type)) %>%
filter(similarity_type %in% c("color", "shape", "texture"))
long_df <- full_join(language_long, human_long)
ggplot(long_df, aes(x = language_similarity, y = human_similarity, color = participant_type))+
geom_point(alpha = .4) +
ggtitle("Predicting Human Similarity") +
ylab("human similarity (KEB card sorting task)") +
xlab("language similarity (cosine distance)") +
facet_wrap(~ similarity_type, scales = "free_x") +
geom_smooth(method = "lm")
# taxonomic
TAXONOMIC_PATH <- here("data/keb_reanalysis/animal_distances_taxonomic.csv")
taxonomic_long <- read_csv(TAXONOMIC_PATH)
LANGUAGE_PATH_WIKI <- here("data/keb_reanalysis/animal_distances_kid.csv")
language_data_wiki <- read_csv(LANGUAGE_PATH_WIKI) %>%
spread(word2, language_similarity) %>%
select(-word1)
all_corrs_mat_langs_wiki <- as.matrix(language_data_wiki)
rownames(all_corrs_mat_langs_wiki) <- colnames(language_data_wiki)
language_long_wiki <- all_corrs_mat_langs_wiki %>%
as.data.frame() %>%
rownames_to_column("animal1") %>%
gather("animal2", "similarity", -animal1) %>%
mutate(sim_type = "lang_wiki_similarity")
taxo_full <- bind_rows(taxonomic_long, language_long_wiki) %>%
filter(animal1 < animal2) %>%
spread(sim_type, similarity) %>%
mutate(participant_type = "Ground Truth") %>%
rename(taxonomic_similarity = taxonomy)
ggplot(taxo_full, aes(x = lang_wiki_similarity, y = -taxonomic_similarity))+
geom_point(alpha = .4) +
ggtitle("Predicting Taxonomic Similarity") +
ylab("taxonomic similarity (as reported in KEB)") +
xlab("language similarity (cosine distance)") +
geom_smooth(method = "lm")
For consistency, taxonomic distances are reported here in terms of similarity (1 - evolutionary distance).
dimension_corr <- long_df %>%
group_by(participant_type, similarity_type) %>%
nest() %>%
mutate(temp = map(data, ~ tidy(cor.test(.$language_similarity,
.$human_similarity, method = "spearman"))),
n = map(data, nrow)) %>%
select(-data) %>%
unnest()
taxo_corr <- taxo_full %>%
group_by(participant_type) %>%
nest() %>%
mutate(temp = map(data, ~ tidy(cor.test(.$lang_wiki_similarity,
-.$taxonomic_similarity, method = "spearman"))),
n = map(data, nrow),
similarity_type = "taxonomy") %>%
select(-data) %>%
unnest()
cor_df <- dimension_corr %>%
bind_rows(taxo_corr) %>%
ungroup() %>%
select(-method, -alternative, -statistic) %>%
rename(rho = estimate) %>%
mutate(similarity_type = fct_relevel(similarity_type, "taxonomy", "shape", "texture"),
participant_type = str_to_title(participant_type),
fisher_transformed_rho = fisherz(rho)) %>%
arrange(similarity_type)
kable(cor_df, digits = 5)
| similarity_type | participant_type | rho | p.value | n | fisher_transformed_rho |
|---|---|---|---|---|---|
| taxonomy | Ground Truth | 0.16683 | 0.02919 | 435 | 0.16840 |
| shape | Blind | 0.05304 | 0.49085 | 435 | 0.05309 |
| shape | Sighted | 0.01831 | 0.81215 | 435 | 0.01831 |
| texture | Blind | 0.07049 | 0.35955 | 435 | 0.07061 |
| texture | Sighted | 0.07096 | 0.35641 | 435 | 0.07107 |
| color | Blind | 0.23627 | 0.00186 | 435 | 0.24082 |
| color | Sighted | -0.03977 | 0.60550 | 435 | -0.03980 |