COLOR_PATH <- here("data/keb_reanalysis/animal_color_distances_language_kid.csv")
TEXTURE_PATH <- here("data/keb_reanalysis/animal_texture_distances_language_kid.csv")
SHAPE_PATH <- here("data/keb_reanalysis/animal_shape_distances_language_kid.csv")

HUMAN_DATA <- here("data/keb_reanalysis/tidy_human_data.csv")

human_data <- read_csv(HUMAN_DATA)

color_data <- read_csv(COLOR_PATH)
texture_data <- read_csv(TEXTURE_PATH)
shape_data <- read_csv(SHAPE_PATH)

Pairwise Correlations

In the Main Text, we report correlations between the pairwise animals distances for each of the three dimensions (language vs. human/ground truth). These are presented more fully below.

### Shape-texture-color plot ###
LANG_ANIMAL_DISTANCE_COLOR <- here("data/keb_reanalysis/animal_color_distances_language_kid.csv")
LANG_ANIMAL_DISTANCE_SHAPE<-  here("data/keb_reanalysis/animal_shape_distances_language_kid.csv")
LANG_ANIMAL_DISTANCE_TEXTURE <- here("data/keb_reanalysis/animal_texture_distances_language_kid.csv")

# human
TIDY_HUMAN_PATH <- here("data/keb_reanalysis/tidy_human_data.csv") 
language_data <- read_csv(LANG_ANIMAL_DISTANCE_COLOR) %>%
  left_join(read_csv(LANG_ANIMAL_DISTANCE_SHAPE), by  = c("animal1", "animal2")) %>%
  left_join(read_csv(LANG_ANIMAL_DISTANCE_TEXTURE),by  = c("animal1", "animal2"))  %>%
  mutate_if(is.numeric, ~-.x )

human_data <- read_csv(TIDY_HUMAN_PATH)  

full_sim_data_wide2 <-  full_join(language_data, human_data,
                                  by = c("animal1", "animal2")) %>%
  spread(similarity_type, human_similarity) %>%
  filter(animal1 < animal2) 

language_long <- full_sim_data_wide2 %>%
  select(contains("animal"), contains("language")) %>%
  distinct() %>%
  gather(similarity_type, language_similarity, -animal1, -animal2) %>%
  rowwise() %>%
  mutate(similarity_type = str_split(similarity_type, "dist_")[[1]][2])
  
human_long <- full_sim_data_wide2 %>%
  select(contains("animal"), contains("human"), participant_type) %>%
  gather(similarity_type, human_similarity, -animal1, -animal2, -participant_type) %>%
  rowwise() %>%
  mutate(similarity_type = str_split(similarity_type, "similarity_")[[1]][2],
         similarity_type = case_when(similarity_type == "skin" ~ "texture", 
                                     TRUE ~ similarity_type)) %>%
  filter(similarity_type %in% c("color", "shape", "texture")) 
  
long_df <- full_join(language_long, human_long)

ggplot(long_df, aes(x = language_similarity, y = human_similarity, color = participant_type))+
  geom_point(alpha = .4) + 
  ggtitle("Predicting Human Similarity") +
  ylab("human similarity (KEB card sorting task)") +
  xlab("language similarity (cosine distance)") +
  facet_wrap(~ similarity_type, scales = "free_x") +
  geom_smooth(method = "lm")

# taxonomic
TAXONOMIC_PATH <- here("data/keb_reanalysis/animal_distances_taxonomic.csv")
taxonomic_long <- read_csv(TAXONOMIC_PATH) 

LANGUAGE_PATH_WIKI <- here("data/keb_reanalysis/animal_distances_kid.csv")
language_data_wiki <- read_csv(LANGUAGE_PATH_WIKI) %>%
  spread(word2, language_similarity) %>%
  select(-word1)

all_corrs_mat_langs_wiki <- as.matrix(language_data_wiki)
rownames(all_corrs_mat_langs_wiki) <- colnames(language_data_wiki)
language_long_wiki <- all_corrs_mat_langs_wiki %>%
  as.data.frame() %>%
  rownames_to_column("animal1") %>%
  gather("animal2", "similarity", -animal1)  %>%
  mutate(sim_type = "lang_wiki_similarity")
taxo_full <- bind_rows(taxonomic_long, language_long_wiki) %>%
  filter(animal1 < animal2) %>%
  spread(sim_type, similarity)  %>%
  mutate(participant_type = "Ground Truth")  %>%
  rename(taxonomic_similarity = taxonomy)

ggplot(taxo_full, aes(x = lang_wiki_similarity, y = -taxonomic_similarity))+
  geom_point(alpha = .4) + 
  ggtitle("Predicting Taxonomic Similarity") +
  ylab("taxonomic similarity (as reported in KEB)") +
  xlab("language similarity (cosine distance)") +
  geom_smooth(method = "lm")

For consistency, taxonomic distances are reported here in terms of similarity (1 - evolutionary distance).

dimension_corr <- long_df %>%
  group_by(participant_type, similarity_type) %>%
  nest() %>%
  mutate(temp = map(data, ~ tidy(cor.test(.$language_similarity,
                                          .$human_similarity, method = "spearman"))),
         n = map(data, nrow)) %>%
  select(-data) %>%
  unnest() 

taxo_corr <- taxo_full %>%
  group_by(participant_type) %>%
  nest() %>%
  mutate(temp = map(data, ~ tidy(cor.test(.$lang_wiki_similarity,
                                          -.$taxonomic_similarity, method = "spearman"))),
         n = map(data, nrow),
         similarity_type = "taxonomy")  %>%
  select(-data) %>%
  unnest() 

cor_df <- dimension_corr %>%
  bind_rows(taxo_corr) %>%
  ungroup() %>%
  select(-method, -alternative, -statistic) %>%
  rename(rho = estimate) %>%
  mutate(similarity_type = fct_relevel(similarity_type, "taxonomy", "shape", "texture"),
         participant_type = str_to_title(participant_type),
         fisher_transformed_rho = fisherz(rho))  %>%
  arrange(similarity_type) 

kable(cor_df, digits = 5)
similarity_type participant_type rho p.value n fisher_transformed_rho
taxonomy Ground Truth 0.16683 0.02919 435 0.16840
shape Blind 0.05304 0.49085 435 0.05309
shape Sighted 0.01831 0.81215 435 0.01831
texture Blind 0.07049 0.35955 435 0.07061
texture Sighted 0.07096 0.35641 435 0.07107
color Blind 0.23627 0.00186 435 0.24082
color Sighted -0.03977 0.60550 435 -0.03980