Comparing word scores by geographical region to connect to racialized
social systems and geopolitics.
Top keywords by
country
# Identify top countries by publication count
top_countries <- M4_tags %>%
separate_rows(AU_CO, sep = ";") %>%
count(AU_CO, sort = TRUE) %>%
slice_head(n = 5) %>%
pull(AU_CO)
# Find top keywords for these countries
keyword_by_country_top <- M4_tags %>%
separate_rows(AU_CO, sep = ";") %>%
separate_rows(DE, sep = ";") %>%
filter(AU_CO %in% top_countries) %>%
group_by(AU_CO, DE) %>%
summarize(keyword_count = n()) %>%
group_by(AU_CO) %>%
slice_max(keyword_count, n = 5) %>%
arrange(AU_CO, desc(keyword_count))
## `summarise()` has grouped output by 'AU_CO'. You can override using the `.groups` argument.
head(keyword_by_country_top, n=17)
## # A tibble: 17 × 3
## # Groups: AU_CO [3]
## AU_CO DE keyword_count
## <chr> <chr> <int>
## 1 AUSTRALIA " WELLBEING" 14
## 2 AUSTRALIA " COVID-19" 10
## 3 AUSTRALIA " INDIGENOUS HEALTH" 10
## 4 AUSTRALIA " PUBLIC HEALTH" 10
## 5 AUSTRALIA "ABORIGINAL HEALTH" 10
## 6 BRAZIL " RACE" 7
## 7 BRAZIL " ILLEGAL FOSSIL TRADE" 6
## 8 BRAZIL " LATIN AMERICA" 6
## 9 BRAZIL " PALAEONTOLOGICAL HERITAGE" 6
## 10 BRAZIL " PARACHUTE SCIENCE" 6
## 11 BRAZIL " RESEARCH ETHICS" 6
## 12 BRAZIL "SCIENTIFIC COLONIALISM" 6
## 13 CANADA <NA> 15
## 14 CANADA " INDIGENOUS METHODOLOGIES" 9
## 15 CANADA " INDIGENOUS PEOPLES" 9
## 16 CANADA " DECOLONIZATION" 8
## 17 CANADA "INDIGENOUS PEOPLES" 8
tail(keyword_by_country_top, n=17)
## # A tibble: 17 × 3
## # Groups: AU_CO [3]
## AU_CO DE keyword_count
## <chr> <chr> <int>
## 1 CANADA " INDIGENOUS METHODOLOGIES" 9
## 2 CANADA " INDIGENOUS PEOPLES" 9
## 3 CANADA " DECOLONIZATION" 8
## 4 CANADA "INDIGENOUS PEOPLES" 8
## 5 UNITED KINGDOM <NA> 10
## 6 UNITED KINGDOM " NATIONALISM" 9
## 7 UNITED KINGDOM " AUTHORITARIANISM" 7
## 8 UNITED KINGDOM " IMMIGRATION" 7
## 9 UNITED KINGDOM " PANDEMIC" 7
## 10 UNITED KINGDOM " SOCIAL DOMINANCE ORIENTATION" 7
## 11 UNITED KINGDOM " THREAT" 7
## 12 UNITED KINGDOM "COVID-19" 7
## 13 USA <NA> 127
## 14 USA " RACISM" 54
## 15 USA " STEM" 54
## 16 USA " RACE" 42
## 17 USA " HIGHER EDUCATION" 41
Plots of top
keywords by country
# Extract keywords for these countries
keywords_by_country <- M4_tags %>%
separate_rows(AU_CO, sep = ";") %>%
separate_rows(DE, sep = ";") %>%
filter(AU_CO %in% top_countries) %>%
group_by(AU_CO, DE) %>%
summarize(keyword_count = n(), .groups = 'drop') %>%
group_by(AU_CO) %>%
slice_max(keyword_count, n = 10) %>% # Top 10 keywords per country
ungroup()
# Remove NA keywords and filter out countries with no valid keywords
keywords_by_country <- keywords_by_country %>%
filter(!is.na(DE) & DE != "NA" & str_trim(DE) != "")
# Create a separate plot for each country with counts on bars
plot_keywords_by_country <- function(country_data) {
ggplot(country_data, aes(x = reorder(DE, keyword_count), y = keyword_count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = keyword_count),
position = position_stack(vjust = 0.5),
color = "white", size = 3) + # Add counts to the bars
coord_flip() +
labs(
title = paste("Top Keywords for", unique(country_data$AU_CO)),
x = "Keywords",
y = "Keyword Count"
) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 8),
plot.title = element_text(size = 10, face = "bold")
)
}
# Split the data by country and create individual plots
country_plots <- keywords_by_country %>%
group_split(AU_CO) %>%
lapply(plot_keywords_by_country)
# Print or save the plots as needed
for (plot in country_plots) {
print(plot)
}





Notions by
Country
We first define the patterns, or notions, to be analyzed.
# Define patterns
patterns <- c("rac*", "nationali*", "xeno*", "colon*", "anti*", "white supremacy")
We then clean and duplicated the data for plotting.
## Warning: tolower argument is not used.
## Warning: There was 1 warning in `summarize()`.
## ℹ In argument: `across(everything(), sum, na.rm = TRUE)`.
## ℹ In group 1: `AU_CO = "AUSTRALIA"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
## Warning: tolower argument is not used.
## # A tibble: 17 × 13
## AU_CO colonialism racism race nationalism racial `anti-white` `anti-immigrant` colonial `anti-asian` racialized nationalisms
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AUSTRA… 0 1 0 0 0 1 0 0 0 0 0
## 2 BANGLA… 0 1 0 0 0 0 0 0 0 0 0
## 3 BRAZIL 0 0 0 0 0 0 0 1 0 0 0
## 4 CANADA 0 0 1 0 0 0 0 1 0 0 0
## 5 CHINA 0 0 0 0 0 0 0 0 0 0 0
## 6 COLOMB… 1 1 0 0 0 0 0 0 0 0 0
## 7 CROATIA 0 0 0 0 0 0 0 0 0 0 0
## 8 ICELAND 0 0 0 0 0 0 0 0 0 0 0
## 9 INDIA 0 0 0 0 0 0 0 0 0 0 1
## 10 ISRAEL 0 0 0 0 0 0 0 0 0 0 0
## 11 MEXICO 0 0 0 0 0 0 0 0 0 0 0
## 12 NORWAY 0 0 0 0 0 0 0 0 0 0 0
## 13 POLAND 0 0 0 0 1 0 0 0 0 0 0
## 14 SOUTH … 0 0 0 0 0 0 0 0 0 0 0
## 15 UNITED… 0 0 0 1 0 0 1 0 0 1 0
## 16 USA 0 5 8 1 6 0 0 0 2 2 0
## 17 VIETNAM 0 0 0 0 0 0 0 0 0 0 0
## # ℹ 1 more variable: `anti-blackness` <dbl>
## # A tibble: 17 × 38
## AU_CO `anti-black` racism racialized antiracist colonialism colonization colonizers race nationalism racial anticipated
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AUSTRALIA 0 8 0 0 0 0 0 0 0 0 0
## 2 BANGLADESH 0 10 0 0 0 0 0 0 0 0 0
## 3 BRAZIL 0 0 0 0 3 0 0 0 0 0 0
## 4 CANADA 1 11 1 0 12 1 0 5 0 6 0
## 5 CHINA 0 1 0 0 0 0 0 0 0 0 0
## 6 COLOMBIA 0 1 1 0 3 1 1 0 0 0 0
## 7 CROATIA 0 0 0 0 0 0 0 0 1 0 0
## 8 ICELAND 0 0 0 0 0 0 0 0 1 0 0
## 9 INDIA 0 0 0 0 0 0 0 0 3 0 0
## 10 ISRAEL 0 1 0 0 0 0 0 0 0 1 0
## 11 MEXICO 0 1 0 0 0 0 0 0 0 0 0
## 12 NORWAY 0 2 0 0 0 0 0 0 0 0 0
## 13 POLAND 0 0 0 0 1 2 0 0 0 1 0
## 14 SOUTH AFRICA 0 1 0 0 3 0 0 0 0 0 0
## 15 UNITED KING… 0 4 0 0 2 0 0 0 1 3 0
## 16 USA 9 61 15 1 5 2 0 31 4 39 2
## 17 VIETNAM 0 0 0 0 1 0 0 0 0 0 0
## # ℹ 26 more variables: racially <dbl>, racialization <dbl>, `anti-asian` <dbl>, racist <dbl>, `anti-racism` <dbl>,
## # colonial <dbl>, racismin <dbl>, `anti-white` <dbl>, colonisation <dbl>, `anti-immigrant` <dbl>, `anti-access` <dbl>,
## # racism.intervention <dbl>, colonizing <dbl>, races <dbl>, nationalities <dbl>, colonies <dbl>, `anti-racist` <dbl>,
## # `race-neutral` <dbl>, `anti-blackness` <dbl>, antiasian <dbl>, `race-ethnicity` <dbl>, nationalist <dbl>, `anti-parsi` <dbl>,
## # nationalisms <dbl>, `anti-dalhousie` <dbl>, `racism-related` <dbl>
## tibble [17 × 13] (S3: tbl_df/tbl/data.frame)
## $ AU_CO : chr [1:17] "AUSTRALIA" "BANGLADESH" "BRAZIL" "CANADA" ...
## $ colonialism : num [1:17] 0 0 0 0 0 1 0 0 0 0 ...
## $ racism : num [1:17] 1 1 0 0 0 1 0 0 0 0 ...
## $ race : num [1:17] 0 0 0 1 0 0 0 0 0 0 ...
## $ nationalism : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ racial : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-white : num [1:17] 1 0 0 0 0 0 0 0 0 0 ...
## $ anti-immigrant: num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ colonial : num [1:17] 0 0 1 1 0 0 0 0 0 0 ...
## $ anti-asian : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ racialized : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ nationalisms : num [1:17] 0 0 0 0 0 0 0 0 1 0 ...
## $ anti-blackness: num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## tibble [17 × 38] (S3: tbl_df/tbl/data.frame)
## $ AU_CO : chr [1:17] "AUSTRALIA" "BANGLADESH" "BRAZIL" "CANADA" ...
## $ anti-black : num [1:17] 0 0 0 1 0 0 0 0 0 0 ...
## $ racism : num [1:17] 8 10 0 11 1 1 0 0 0 1 ...
## $ racialized : num [1:17] 0 0 0 1 0 1 0 0 0 0 ...
## $ antiracist : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ colonialism : num [1:17] 0 0 3 12 0 3 0 0 0 0 ...
## $ colonization : num [1:17] 0 0 0 1 0 1 0 0 0 0 ...
## $ colonizers : num [1:17] 0 0 0 0 0 1 0 0 0 0 ...
## $ race : num [1:17] 0 0 0 5 0 0 0 0 0 0 ...
## $ nationalism : num [1:17] 0 0 0 0 0 0 1 1 3 0 ...
## $ racial : num [1:17] 0 0 0 6 0 0 0 0 0 1 ...
## $ anticipated : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ racially : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ racialization : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-asian : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ racist : num [1:17] 1 0 0 1 0 0 0 0 0 0 ...
## $ anti-racism : num [1:17] 0 0 0 1 0 0 0 0 0 0 ...
## $ colonial : num [1:17] 0 0 0 1 0 0 0 0 0 0 ...
## $ racismin : num [1:17] 1 0 0 0 0 0 0 0 0 0 ...
## $ anti-white : num [1:17] 2 0 0 0 0 0 0 0 0 0 ...
## $ colonisation : num [1:17] 1 0 0 0 0 0 0 0 0 0 ...
## $ anti-immigrant : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-access : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ racism.intervention: num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ colonizing : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ races : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ nationalities : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ colonies : num [1:17] 0 0 0 1 0 0 0 0 0 0 ...
## $ anti-racist : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ race-neutral : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-blackness : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ antiasian : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ race-ethnicity : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ nationalist : num [1:17] 0 0 0 0 0 0 0 0 2 0 ...
## $ anti-parsi : num [1:17] 0 0 0 0 0 0 0 0 1 0 ...
## $ nationalisms : num [1:17] 0 0 0 0 0 0 0 0 1 0 ...
## $ anti-dalhousie : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
## $ racism-related : num [1:17] 0 0 0 0 0 0 0 0 0 0 ...
Abstract patterns
(top 10 notions)
Top 10 patterns in abstract.
# Create bar plot of top patterns
ggplot(top_patterns, aes(x = reorder(pattern, total_count), y = total_count)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 Patterns in Abstracts",
x = "Pattern",
y = "Total Count"
) +
theme_minimal()

Pattern occurence
by country
The pattern occurrence graphic illustrates the frequency of specific
keywords related to race and social dynamics in the abstracts of
academic publications, categorized by country. Each bar represents a
keyword (such as “racism,” “colonialism,” or “nationalism”) and its
corresponding count in the abstracts, allowing for a visual comparison
of how often these themes are discussed across different countries. The
bars are color-coded to indicate the country of the authors, making it
easy to identify which countries are engaging with particular
issues.
# Visualization of top patterns by country
ggplot(abstract_summary, aes(x = pattern, y = count, fill = AU_CO)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
labs(
title = "Pattern Occurrences by Country (AB)",
x = "Pattern",
y = "Count"
) +
theme_minimal()

# Visualization of top patterns by country in abstracts with counts on bars (only if count > 4)
ggplot(abstract_summary, aes(x = pattern, y = count, fill = AU_CO)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = ifelse(count > 3, count, "")),
position = position_dodge(width = 0.9),
vjust = -0.5, # Adjust vertical position of text
size = 3) + # Adjust text size as needed
coord_flip() +
labs(
title = "", # Pattern Occurrences by Country in Abstracts
x = "Pattern",
y = "Count"
) +
theme_minimal()

Final modified graphic of relative proportions for whose count value
is one or more.
# Visualization of top patterns by country in abstracts with proportions on bars
y <- ggplot(abstract_summary, aes(x = pattern, y = proportion, fill = AU_CO)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = ifelse(proportion > 0.05, round(proportion, 2), "")),
position = position_dodge(width = 0.9),
vjust = -0.5, # Adjust vertical position of text
size = 3) + # Adjust text size
coord_flip() +
labs(
title = "", # Relative Proportions of Notions by Country (Abstract)
x = "Pattern",
y = "Proportion"
) +
theme_minimal()
y

# Save the plot as a high-resolution image
ggsave("plots/pattern_occurence.png", plot = y, width = 10, height = 8, dpi = 1200, bg = "white")