d <- list.files("data/emd/", pattern = "*.csv") %>%
  purrr::map(function(x) paste0("data/emd/", x)) %>% 
  purrr::map(read_csv) %>% 
  bind_rows() 

cues = read_csv("data/extreme_cues.csv") %>%
  select(cue, conc.bin)


d %<>% left_join(cues, by=c("word" = "cue")) 

big.countries = left_join(count(d, country1), 
              count(d, country2), 
              by=c("country1" = "country2")) %>%
   mutate(total = n.x + n.y) %>%
   arrange(-total) %>%
   slice(1:19) %>%
   rename(country = country1)

# get same countries for every word
d %<>% filter(country1 %in% big.countries$country & 
                country2 %in% big.countries$country) %>%
  filter(!is.na(dist))


all.combos = combinations(n = length(big.countries$country), 
                             r = 2, 
                             repeats.allowed = F, 
                             v = big.countries$country) %>%
                     as.data.frame() %>%
                     rename(country1 = V1, country2 = V2)

Distance acrosss all items

# get movers as matrix
all_movers = get_movers_dist_mat(d,  all.combos)

# take the mean movers distances across items
all.means = apply(simplify2array(all_movers), c(1,2), mean, na.rm = T)

# plot
ggdendrogram(hclust(dist(all.means)), size = 2) +
  ggtitle("all")

Distance acrosss abstract items

# get movers as matrix
all_movers.low = get_movers_dist_mat(filter(d,conc.bin == 1) ,  all.combos)

# take the mean movers distances across items
all.means.low = apply(simplify2array(all_movers.low), c(1,2), mean, na.rm = T)

# plot
ggdendrogram(hclust(dist(all.means.low)), size = 2) +
  ggtitle("abstract")

mean(dist(all.means.low), na.rm = T)
## [1] 0.8886237
mean(all.means.low, na.rm = T)
## [1] 0.5819567
var(dist(all.means.low), na.rm = T)
## [1] 0.495297

Distance acrosss concrete items

# get movers as matrix
all_movers.high = get_movers_dist_mat(filter(d,conc.bin == 6) ,  all.combos)

# take the mean movers distances across items
all.means.high = apply(simplify2array(all_movers.high), c(1,2), mean, na.rm = T)

# plot
ggdendrogram(hclust(dist(all.means.high)), size = 2)+
  ggtitle("concrete")

mean(dist(all.means.high), na.rm = T)
## [1] 0.9100153
mean(all.means.high, na.rm = T)
## [1] 0.5813365
var(dist(all.means.high), na.rm = T)
## [1] 0.5203369

```