library(tidyverse)
library(here)
library(knitr)
library(broom)
library(Matrix)
library(dendextend)
library(cowplot)


### Shape-texture-color plot ###
LANG_ANIMAL_DISTANCE_COLOR <- here("data/processed/animal_color_distances_language_coca.csv")
LANG_ANIMAL_DISTANCE_SHAPE<- here("data/processed/animal_shape_distances_language_coca.csv")
LANG_ANIMAL_DISTANCE_TEXTURE <- here("data/processed/animal_texture_distances_language_coca.csv")

TIDY_HUMAN_PATH <- here("data/processed/tidy_human_data.csv") 
language_data <- read_csv(LANG_ANIMAL_DISTANCE_COLOR) %>%
  left_join(read_csv(LANG_ANIMAL_DISTANCE_SHAPE), by  = c("animal1", "animal2")) %>%
  left_join(read_csv(LANG_ANIMAL_DISTANCE_TEXTURE),by  = c("animal1", "animal2"))  %>%
  mutate_if(is.numeric, ~-.x )

human_data <- read_csv(TIDY_HUMAN_PATH) 
full_sim_data <- full_join(language_data, human_data, by = c("animal1", "animal2"))   

human_data_wide <- human_data %>%
  unite("measure", c("participant_type", "similarity_type")) %>%
  spread(measure, human_similarity)

full_sim_data_wide2 <-  full_join(language_data, human_data,
                                  by = c("animal1", "animal2"))    %>%
  spread(similarity_type, human_similarity) %>%
  filter(animal1 < animal2)


color_cors <- full_sim_data_wide2 %>%
  group_by(participant_type) %>%
  nest() %>%
  mutate(temp = map(data, ~ tidy(cor.test(.$language_similarity_simple_dist_color,
                                          .$human_similarity_color, method = "spearman"))),
         n = map(data, nrow),
         dimension = "Color") %>%
  select(-data) %>%
  unnest() 

texture_cors <- full_sim_data_wide2 %>%
  group_by(participant_type) %>%
  nest() %>%
  mutate(temp = map(data, ~ tidy(cor.test(.$language_similarity_simple_dist_texture,
                                          .$human_similarity_skin, method = "spearman"))),
         n = map(data, nrow),
         dimension = "Skin Texture") %>%
  select(-data) %>%
  unnest() 


shape_cors <- full_sim_data_wide2 %>%
  group_by(participant_type) %>%
  nest() %>%
  mutate(temp = map(data, ~ tidy(cor.test(.$language_similarity_simple_dist_shape,
                                          .$human_similarity_shape, method = "spearman"))),
         n = map(data, nrow),
         dimension = "Shape") %>%
  select(-data) %>%
  unnest()

TAXONOMIC_PATH <- here("data/raw/taxonomy_matrix.mat")
taxonomic_data <- R.matlab::readMat(TAXONOMIC_PATH)[[2]]  
LABELS <- c("shark", "swan", "flamingo", "pigeon", "crow", "elephant", 
            "mammoth", "sloth", "beaver", "gorilla", "bat", "rhino", 
            "zebra", "llama", "hippo", "killerwhale", "dolphin", "giraffe",
            "sheep", "goat", "deer", "pig", "boar", "lion", "panther", "cheetah",
            "skunk", "panda", "polarbear", "grizzly") %>% rev() # from SI fig s2
colnames(taxonomic_data) <- LABELS
rownames(taxonomic_data) <- LABELS
taxonomic_long <- taxonomic_data %>%
  as.data.frame() %>%
  rownames_to_column("animal1") %>%
  gather("animal2", "similarity", -animal1) %>%
  mutate(sim_type = "taxonomic_similarity")
LANGUAGE_PATH_WIKI <- here("data/processed/animal_distances_coca.csv")
language_data_wiki <- read_csv(LANGUAGE_PATH_WIKI) %>%
  spread(word2, language_similarity) %>%
  select(-word1)
all_corrs_mat_langs_wiki <- as.matrix(language_data_wiki)
rownames(all_corrs_mat_langs_wiki) <- colnames(language_data_wiki)
language_long_wiki <- all_corrs_mat_langs_wiki %>%
  as.data.frame() %>%
  rownames_to_column("animal1") %>%
  gather("animal2", "similarity", -animal1)  %>%
  mutate(sim_type = "lang_wiki_similarity")

taxo_corr <- bind_rows(taxonomic_long, language_long_wiki) %>%
  filter(animal1 < animal2) %>%
  spread(sim_type, similarity)  %>%
  mutate(participant_type = "Ground Truth") %>%
  group_by(participant_type) %>%
  nest() %>%
  mutate(temp = map(data, ~ tidy(cor.test(.$lang_wiki_similarity,
                                          -.$taxonomic_similarity, method = "spearman"))),
         n = map(data, nrow),
         dimension = "Taxonomy")  %>%
  select(-data) %>%
  unnest() 

cor_df <- color_cors %>%
  bind_rows(texture_cors) %>%
  bind_rows(shape_cors) %>%
  bind_rows(taxo_corr) %>%
  select(-method, -alternative, -statistic) %>%
  mutate(se = 1/sqrt(n-3),
         estimate_se_l = estimate - se,
         estimate_se_h = estimate + se,
         dimension = fct_relevel(dimension, "Taxonomy", "Shape", "Skin Texture"),
         participant_type = str_to_title(participant_type)) %>%
  rowwise() %>%
  mutate(sig = case_when(p.value < .01 ~ "**",
                         p.value < .05 ~ "*",
                         TRUE ~ ""))


ggplot(cor_df, aes(x = fct_rev(participant_type), y = estimate, fill = participant_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(~dimension, drop = T, scales="free_x",space = "free_x") +
  xlab("Language as predictor of...") +
  geom_text(aes(y = estimate + .07, label = sig), size = 6) +
  geom_linerange(aes(ymin = estimate_se_l, ymax = estimate_se_h)) +
  theme_classic(base_size = 13) +
  scale_fill_manual(values = c( "#0345E1", "yellow","#DB3A26")) +
  scale_y_continuous(
    expand = expand_scale(mult = c(0, 0.05)),
    name = "Fisher's Z-transformed rho",
    limits = c(0, .42)) +
  theme(legend.position = "none")

kable(cor_df)
participant_type dimension n estimate p.value se estimate_se_l estimate_se_h sig
Blind Color 435 0.1137762 0.0269720 0.0481125 0.0656636 0.1618887 *
Sighted Color 435 0.0770375 0.1349032 0.0481125 0.0289250 0.1251501
Blind Skin Texture 435 0.0853810 0.0974129 0.0481125 0.0372685 0.1334936
Sighted Skin Texture 435 0.0236991 0.6460171 0.0481125 -0.0244134 0.0718117
Blind Shape 435 0.0885266 0.0856455 0.0481125 0.0404141 0.1366391
Sighted Shape 435 0.0556794 0.2802382 0.0481125 0.0075669 0.1037919
Ground Truth Taxonomy 435 0.1974317 0.0001116 0.0481125 0.1493192 0.2455442 **