Explore pairwise country similarities

ITEM <- "bread"

Here we’re just looking at one item: bread

Read in drawing pairwise-sims file.

if (ITEM == "tree"){ # this is just because within is in a separate file for tree - will fix this
  tree <- read_feather("../../data/keras_similarities/pairwise_country/tree_all_sims.txt")
  tree_within <- read_feather("../../data/keras_similarities/pairwise_country/tree_all_sims_within.txt")
  
  d <- bind_rows(tree, tree_within) %>%
    mutate(key_id_1 = as.character(key_id_1),
           key_id_2 = as.character(key_id_2))
  
} else {
  d <- read_feather(paste0("../../data/keras_similarities/pairwise_country/", ITEM , "_all_sims.txt"))
}

Number of unique participants per each country (sampled 50).(Note that for even smaller countries, some duplicates)

d_big = d %>%
  group_by(country_code_1, key_id_1)%>%
  slice(1) %>%
  ungroup() %>%
  count(country_code_1)%>%
  arrange(-n)%>%
  as.data.frame() 
#kable(d_big)

Remove NAs- very few, these come from 8 drawings (not sure why)

d_clean <- d %>%
  filter(!is.na(cosine)) %>%
  filter(country_code_1 %in% d_big$country_code_1 & country_code_2 %in% d_big$country_code_1)

Get country-wise similarity ratios

within_item_cosines <- d_clean %>%
  filter(country_code_1 == country_code_2) %>%
  filter(key_id_1 != key_id_2) %>%
  rename(key_id = key_id_1) %>%
  group_by(key_id) %>%
  summarize(within_cosine = mean(cosine)) 

across_item_cosines <- d_clean %>%
  filter(key_id_1 != key_id_2) %>%
  filter(country_code_1 != country_code_2) 

all_across_cosines <- across_item_cosines %>%
  bind_rows(across_item_cosines %>%  # get compliment
              rename(country_code_1 = country_code_2,
                     country_code_2 = country_code_1,
                     key_id_1 = key_id_2,
                     key_id_2 = key_id_1)) %>%
  rename(key_id = key_id_1) %>%
  group_by(country_code_1, country_code_2, key_id) %>%
  summarize(across_cosine = mean(cosine))

pairwise_ratios <- all_across_cosines %>% 
  left_join(within_item_cosines, by = "key_id") %>%
  mutate(ratio = across_cosine/within_cosine) %>%
  group_by(country_code_1, country_code_2) %>%
  summarize(mean_ratio = mean(ratio))

Plot heatmap by continent

# get contintents
pairwise_ratios_with_continents <- pairwise_ratios %>%
  rowwise() %>%
  mutate(country_name_1 = as.factor(countrycode(country_code_1, "iso2c","country.name")),
         country_name_2 = as.factor(countrycode(country_code_2, "iso2c","country.name")),
         continent_name_1 = countrycode(country_code_1, 'iso2c', 'continent'),
         continent_name_2 = countrycode(country_code_2, 'iso2c', 'continent')) %>%
  ungroup() %>%
  mutate(cont_order_1 = as.factor(continent_name_1) %>% as.numeric,
         cont_order_2 = as.factor(continent_name_2) %>% as.numeric) %>%
  select(-continent_name_1, -continent_name_2) %>%
  mutate(country_name_1 = fct_reorder(country_name_1, cont_order_1), 
         country_name_2 = fct_reorder(country_name_2, cont_order_2)) 

# plot heatmap
pairwise_ratios_with_continents %>%
  ggplot(aes(x = country_name_1, 
             y = country_name_2)) +
  geom_raster(aes(fill = mean_ratio)) + 
  scale_fill_continuous(low = "#ffffcc", high = "#800026") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, 
                                   hjust = 1, vjust = 1, size = 6),
        axis.text.y = element_text(size = 6),
        axis.title = element_blank())

Plot ratio denodogram

all_countries <- unique(pairwise_ratios_with_continents$country_name_1)
NUM_COUNTRIES <- length(all_countries)

m = matrix(nrow = NUM_COUNTRIES, ncol = NUM_COUNTRIES)

for (i in 1:dim(pairwise_ratios_with_continents)[1]){ 
  sim = unlist(pairwise_ratios_with_continents[i, "mean_ratio"])
  x = which(all_countries == unlist(pairwise_ratios_with_continents[i, "country_name_1"]))
  y = which(all_countries == unlist(pairwise_ratios_with_continents[i, "country_name_2"]))
  m[x,y] <- sim
}

colnames(m) = all_countries
rownames(m) = all_countries

# plot
ggdendrogram(hclust(dist(m)), size = 2)

write_csv(pairwise_ratios_with_continents, 
          paste0("../../data/keras_similarities/pairwise_country/", ITEM, "_sim_ratios.csv"))

Explore pairwise country similarities

item-level measure

Molly Lewis

2017-08-10

Get country-wise similarity ratios

Plot heatmap by continent

Plot ratio denodogram