Read in data

eng_raw = read_csv("correlation-swow/correlation_full_en.csv") %>% 
  select(-1)%>%
  mutate(language = "English")

nl_raw = read_csv("correlation-swow/correlation_full_nl.csv") %>% 
  select(-1) %>%
  mutate(language = "Dutch")

dutch_dict <- read_csv("../association_figure/SWOW_dutch/mmc2.csv") %>%
  select(Word, Translation) %>%
  filter(Translation != "") %>%
  mutate(Translation = tolower(Translation))


dutch_dict[dutch_dict$Translation == "to eat", "Translation"] <- "eat"

Dancer

dancer_words <- c("graceful", "music", "rhythm", "dance", "waltz", "slim", "sexy")
original_plot <- eng_raw %>%
  filter(cue == "dancer") %>%
  arrange(-swow) %>%
  mutate(num = row_number(-swow)) %>%
  filter(target %in% dancer_words) %>%
  as.data.frame()

eng_plot_data <- original_plot %>%
  mutate(swow_normalized = swow/sum(swow))
dancer_nl <- dutch_dict[dutch_dict$Translation == "dancer", "Word"]

nl_plot <- nl_raw %>%
  filter(cue == dancer_nl[[1]]) %>%
  left_join(dutch_dict, by = c("target" = "Word")) %>%
  filter(Translation %in% dancer_words)

nl_plot_data <- nl_plot %>%
  mutate(swow_normalized = swow/sum(swow))%>%
  select(-target) %>%
  dplyr::rename(target = Translation)

eng_plot_data %>%
  bind_rows(nl_plot_data) %>%
  ggplot(aes(x = target, y = swow_normalized, 
                         fill = target)) +
  geom_bar(stat = "identity") +
  facet_grid(~language) +
  coord_flip() +
  theme_minimal() +
  theme(legend.position = "none") +
   ylab("swow conditional probabilities (?)") +
  xlab("word association") +
  ggtitle("dancer")

Snack - original words

snack_words <- c("eat", "apple", "cheese", "hunger", "cookie", "fat", "small", "candy", "unhealthy", "chocolate", "pizza", "sugar", "bar", "bite", "fruit")

original_plot <- eng_raw %>%
  filter(cue == "snack") %>%
  arrange(-swow) %>%
  mutate(num = row_number(-swow)) %>%
  filter(target %in% snack_words) %>%
  as.data.frame()

eng_plot_data <- original_plot %>%
  mutate(swow_normalized = swow/sum(swow))
snack_nl <- dutch_dict[dutch_dict$Translation == "snack", "Word"] 

nl_plot <- nl_raw %>%
  filter(cue == snack_nl[[1]]) %>%
  left_join(dutch_dict, by = c("target" = "Word")) %>%
  filter(Translation %in% snack_words)

nl_plot_data <- nl_plot %>%
  mutate(swow_normalized = swow/sum(swow))%>%
  select(-target) %>%
  dplyr::rename(target = Translation)

eng_plot_data %>%
  bind_rows(nl_plot_data) %>%
  ggplot(aes(x = target, y = swow_normalized, 
                         fill = target)) +
  geom_bar(stat = "identity") +
  facet_grid(~language) +
  coord_flip() +
  theme_minimal() +
  theme(legend.position = "none") +
  ylab("swow conditional probabilities (?)") +
  xlab("word association") +
  ggtitle("snack")

… the languages don’t really look that different on this subset of words. Below is a different subset.

Snack - new words

snack_words2 <- c("unhealthy",  "pizza",  "candy", "cheese", "apple", "sausage",  "chocolate", "candy")
original_plot <- eng_raw %>%
  filter(cue == "snack") %>%
  arrange(-swow) %>%
  mutate(num = row_number(-swow)) %>%
  filter(target %in% snack_words2) %>%
  as.data.frame()

eng_plot_data <- original_plot %>%
  mutate(swow_normalized = swow/sum(swow))
snack_nl <- dutch_dict[dutch_dict$Translation == "snack", "Word"] 

nl_plot <- nl_raw %>%
  filter(cue == snack_nl[[1]]) %>%
  left_join(dutch_dict, by = c("target" = "Word")) %>%
  filter(Translation %in% snack_words2)

nl_plot_data <- nl_plot %>%
  mutate(swow_normalized = swow/sum(swow))%>%
  select(-target) %>%
  dplyr::rename(target = Translation)

eng_plot_data %>%
  bind_rows(nl_plot_data) %>%
  ggplot(aes(x = target, y = swow_normalized, 
                         fill = target)) +
  geom_bar(stat = "identity") +
  facet_grid(~language) +
  coord_flip() +
  theme_minimal() +
  theme(legend.position = "none") +
  ylab("swow conditional probabilities (?)") +
  xlab("word association") +
  ggtitle("snack")