Read in data
eng_raw = read_csv("correlation-swow/correlation_full_en.csv") %>%
select(-1)%>%
mutate(language = "English")
nl_raw = read_csv("correlation-swow/correlation_full_nl.csv") %>%
select(-1) %>%
mutate(language = "Dutch")
dutch_dict <- read_csv("../association_figure/SWOW_dutch/mmc2.csv") %>%
select(Word, Translation) %>%
filter(Translation != "") %>%
mutate(Translation = tolower(Translation))
dutch_dict[dutch_dict$Translation == "to eat", "Translation"] <- "eat"
dancer_words <- c("graceful", "music", "rhythm", "dance", "waltz", "slim", "sexy")
original_plot <- eng_raw %>%
filter(cue == "dancer") %>%
arrange(-swow) %>%
mutate(num = row_number(-swow)) %>%
filter(target %in% dancer_words) %>%
as.data.frame()
eng_plot_data <- original_plot %>%
mutate(swow_normalized = swow/sum(swow))
dancer_nl <- dutch_dict[dutch_dict$Translation == "dancer", "Word"]
nl_plot <- nl_raw %>%
filter(cue == dancer_nl[[1]]) %>%
left_join(dutch_dict, by = c("target" = "Word")) %>%
filter(Translation %in% dancer_words)
nl_plot_data <- nl_plot %>%
mutate(swow_normalized = swow/sum(swow))%>%
select(-target) %>%
dplyr::rename(target = Translation)
eng_plot_data %>%
bind_rows(nl_plot_data) %>%
ggplot(aes(x = target, y = swow_normalized,
fill = target)) +
geom_bar(stat = "identity") +
facet_grid(~language) +
coord_flip() +
theme_minimal() +
theme(legend.position = "none") +
ylab("swow conditional probabilities (?)") +
xlab("word association") +
ggtitle("dancer")
snack_words <- c("eat", "apple", "cheese", "hunger", "cookie", "fat", "small", "candy", "unhealthy", "chocolate", "pizza", "sugar", "bar", "bite", "fruit")
original_plot <- eng_raw %>%
filter(cue == "snack") %>%
arrange(-swow) %>%
mutate(num = row_number(-swow)) %>%
filter(target %in% snack_words) %>%
as.data.frame()
eng_plot_data <- original_plot %>%
mutate(swow_normalized = swow/sum(swow))
snack_nl <- dutch_dict[dutch_dict$Translation == "snack", "Word"]
nl_plot <- nl_raw %>%
filter(cue == snack_nl[[1]]) %>%
left_join(dutch_dict, by = c("target" = "Word")) %>%
filter(Translation %in% snack_words)
nl_plot_data <- nl_plot %>%
mutate(swow_normalized = swow/sum(swow))%>%
select(-target) %>%
dplyr::rename(target = Translation)
eng_plot_data %>%
bind_rows(nl_plot_data) %>%
ggplot(aes(x = target, y = swow_normalized,
fill = target)) +
geom_bar(stat = "identity") +
facet_grid(~language) +
coord_flip() +
theme_minimal() +
theme(legend.position = "none") +
ylab("swow conditional probabilities (?)") +
xlab("word association") +
ggtitle("snack")
… the languages don’t really look that different on this subset of words. Below is a different subset.
snack_words2 <- c("unhealthy", "pizza", "candy", "cheese", "apple", "sausage", "chocolate", "candy")
original_plot <- eng_raw %>%
filter(cue == "snack") %>%
arrange(-swow) %>%
mutate(num = row_number(-swow)) %>%
filter(target %in% snack_words2) %>%
as.data.frame()
eng_plot_data <- original_plot %>%
mutate(swow_normalized = swow/sum(swow))
snack_nl <- dutch_dict[dutch_dict$Translation == "snack", "Word"]
nl_plot <- nl_raw %>%
filter(cue == snack_nl[[1]]) %>%
left_join(dutch_dict, by = c("target" = "Word")) %>%
filter(Translation %in% snack_words2)
nl_plot_data <- nl_plot %>%
mutate(swow_normalized = swow/sum(swow))%>%
select(-target) %>%
dplyr::rename(target = Translation)
eng_plot_data %>%
bind_rows(nl_plot_data) %>%
ggplot(aes(x = target, y = swow_normalized,
fill = target)) +
geom_bar(stat = "identity") +
facet_grid(~language) +
coord_flip() +
theme_minimal() +
theme(legend.position = "none") +
ylab("swow conditional probabilities (?)") +
xlab("word association") +
ggtitle("snack")