KEB re-analysis with kidbook models

Pairwise Correlations

In the Main Text, we report correlations between the pairwise animals distances for each of the three dimensions (language vs. human/ground truth). These are presented more fully below.

### Shape-texture-color plot ###
LANG_ANIMAL_DISTANCE_COLOR <- here("data/keb_reanalysis/animal_color_distances_language_kid.csv")
LANG_ANIMAL_DISTANCE_SHAPE<-  here("data/keb_reanalysis/animal_shape_distances_language_kid.csv")
LANG_ANIMAL_DISTANCE_TEXTURE <- here("data/keb_reanalysis/animal_texture_distances_language_kid.csv")

# human
TIDY_HUMAN_PATH <- here("data/keb_reanalysis/tidy_human_data.csv") 
language_data <- read_csv(LANG_ANIMAL_DISTANCE_COLOR) %>%
  left_join(read_csv(LANG_ANIMAL_DISTANCE_SHAPE), by  = c("animal1", "animal2")) %>%
  left_join(read_csv(LANG_ANIMAL_DISTANCE_TEXTURE),by  = c("animal1", "animal2"))  %>%
  mutate_if(is.numeric, ~-.x )

human_data <- read_csv(TIDY_HUMAN_PATH)  

full_sim_data_wide2 <-  full_join(language_data, human_data,
                                  by = c("animal1", "animal2")) %>%
  spread(similarity_type, human_similarity) %>%
  filter(animal1 < animal2) 

language_long <- full_sim_data_wide2 %>%
  select(contains("animal"), contains("language")) %>%
  distinct() %>%
  gather(similarity_type, language_similarity, -animal1, -animal2) %>%
  rowwise() %>%
  mutate(similarity_type = str_split(similarity_type, "dist_")[[1]][2])
  
human_long <- full_sim_data_wide2 %>%
  select(contains("animal"), contains("human"), participant_type) %>%
  gather(similarity_type, human_similarity, -animal1, -animal2, -participant_type) %>%
  rowwise() %>%
  mutate(similarity_type = str_split(similarity_type, "similarity_")[[1]][2],
         similarity_type = case_when(similarity_type == "skin" ~ "texture", 
                                     TRUE ~ similarity_type)) %>%
  filter(similarity_type %in% c("color", "shape", "texture")) 
  
long_df <- full_join(language_long, human_long)

ggplot(long_df, aes(x = language_similarity, y = human_similarity, color = participant_type))+
  geom_point(alpha = .4) + 
  ggtitle("Predicting Human Similarity") +
  ylab("human similarity (KEB card sorting task)") +
  xlab("language similarity (cosine distance)") +
  facet_wrap(~ similarity_type, scales = "free_x") +
  geom_smooth(method = "lm")

# taxonomic
TAXONOMIC_PATH <- here("data/keb_reanalysis/animal_distances_taxonomic.csv")
taxonomic_long <- read_csv(TAXONOMIC_PATH) 

LANGUAGE_PATH_WIKI <- here("data/keb_reanalysis/animal_distances_kid.csv")
language_data_wiki <- read_csv(LANGUAGE_PATH_WIKI) %>%
  spread(word2, language_similarity) %>%
  select(-word1)

all_corrs_mat_langs_wiki <- as.matrix(language_data_wiki)
rownames(all_corrs_mat_langs_wiki) <- colnames(language_data_wiki)
language_long_wiki <- all_corrs_mat_langs_wiki %>%
  as.data.frame() %>%
  rownames_to_column("animal1") %>%
  gather("animal2", "similarity", -animal1)  %>%
  mutate(sim_type = "lang_wiki_similarity")

taxo_full <- bind_rows(taxonomic_long, language_long_wiki) %>%
  filter(animal1 < animal2) %>%
  spread(sim_type, similarity)  %>%
  mutate(participant_type = "Ground Truth")  %>%
  rename(taxonomic_similarity = taxonomy)

ggplot(taxo_full, aes(x = lang_wiki_similarity, y = -taxonomic_similarity))+
  geom_point(alpha = .4) + 
  ggtitle("Predicting Taxonomic Similarity") +
  ylab("taxonomic similarity (as reported in KEB)") +
  xlab("language similarity (cosine distance)") +
  geom_smooth(method = "lm")

For consistency, taxonomic distances are reported here in terms of similarity (1 - evolutionary distance).

dimension_corr <- long_df %>%
  group_by(participant_type, similarity_type) %>%
  nest() %>%
  mutate(temp = map(data, ~ tidy(cor.test(.$language_similarity,
                                          .$human_similarity, method = "spearman"))),
         n = map(data, nrow)) %>%
  select(-data) %>%
  unnest() 

taxo_corr <- taxo_full %>%
  group_by(participant_type) %>%
  nest() %>%
  mutate(temp = map(data, ~ tidy(cor.test(.$lang_wiki_similarity,
                                          -.$taxonomic_similarity, method = "spearman"))),
         n = map(data, nrow),
         similarity_type = "taxonomy")  %>%
  select(-data) %>%
  unnest() 

cor_df <- dimension_corr %>%
  bind_rows(taxo_corr) %>%
  ungroup() %>%
  select(-method, -alternative, -statistic) %>%
  rename(rho = estimate) %>%
  mutate(similarity_type = fct_relevel(similarity_type, "taxonomy", "shape", "texture"),
         participant_type = str_to_title(participant_type),
         fisher_transformed_rho = fisherz(rho))  %>%
  arrange(similarity_type) 

kable(cor_df, digits = 5)

similarity_type	participant_type	rho	p.value	n	fisher_transformed_rho
taxonomy	Ground Truth	0.16683	0.02919	435	0.16840
shape	Blind	0.05304	0.49085	435	0.05309
shape	Sighted	0.01831	0.81215	435	0.01831
texture	Blind	0.07049	0.35955	435	0.07061
texture	Sighted	0.07096	0.35641	435	0.07107
color	Blind	0.23627	0.00186	435	0.24082
color	Sighted	-0.03977	0.60550	435	-0.03980

KEB re-analysis with kidbook models

Molly Lewis

2020-06-18

Pairwise Correlations