Top 40 phrases Congress posted on X

Here’s what Democrats and Republicans in the U.S. Congress used their official X.com accounts to post about most often this past month:
The data are collected and summarized monthly by MTSU School of Journalism and Strategic Media faculty experts working with Brandwatch data in the school’s Social Media Insights Lab.
R Code:
# Libraries

library(tidyverse)
library(tidytext)
library(stringr)
library(scales)
library(forcats)

# Get data and filter by time frame

DataToAnalyze  <- readRDS("Latest119thData.RDS") %>%
  filter(
    Date >= as.POSIXct("2025-05-01 00:00:00",
                        tz = "America/Chicago") &
    Date <= as.POSIXct("2025-05-31 23:59:59",
                        tz = "America/Chicago"))

# Cleaning data

replace_reg <- "https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&amp;|&lt;|&gt;|RT|https"
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"

# Word frequency

tidy_tweets <- DataToAnalyze %>% 
  filter(!str_detect(Full.Text, "^RT")) %>%
  mutate(text = str_replace_all(Full.Text, replace_reg, "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = unnest_reg) %>%
  filter(!word %in% stop_words$word,
         !word %in% str_remove_all(stop_words$word, "'"),
         str_detect(word, "[a-z]"))

WordFrequency <- tidy_tweets %>% 
  count(party, word, sort = TRUE) %>% 
  left_join(tidy_tweets %>% 
              count(party, name = "total")) %>%
  mutate(freq = n/total)


# Bigram frequency

replace_reg <- "https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&amp;|&lt;|&gt;|RT|https"

tidy_bigrams <- DataToAnalyze %>% 
  filter(!str_detect(Full.Text, "^RT")) %>%
  mutate(text = str_replace_all(Full.Text, replace_reg, "")) %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
  separate(bigram, into = c("word1", "word2"), sep = " ") %>%
  filter(
    !word1 %in% stop_words$word,
    !word2 %in% stop_words$word,
    str_detect(word1, "[a-z]"),
    str_detect(word2, "[a-z]")
  ) %>%
  unite(bigram, word1, word2, sep = " ")

BigramFrequency <- tidy_bigrams %>% 
  count(party, bigram, sort = TRUE) %>% 
  left_join(tidy_tweets %>% 
              count(party, name = "total")) %>%
  mutate(freq = n/total)

# Prepare top 40 bigrams per party with sorted factors

top_bigrams <- BigramFrequency %>%
  filter(party %in% c("Democrat", "Republican")) %>% 
  group_by(party) %>%
  slice_max(order_by = n, n = 40, with_ties = FALSE) %>%
  mutate(bigram = fct_reorder(bigram, n)) %>%
  ungroup() %>%
  mutate(bigram_faceted = fct_reorder(interaction(party, bigram), n))

# Plot with custom colors

InitialPlot <- ggplot(top_bigrams, aes(x = bigram_faceted, y = n, fill = party)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  facet_wrap( ~ party, scales = "free") +
  scale_fill_manual(values = c(
    "Republican" = "#C33C54",
    "Democrat" = "#254E70"
  )) +
  scale_x_discrete(labels = ~ gsub(".*\\.", "", .)) +
  labs(
    title = "Top 40 phrases Congress posted on X in May, by Party",
    x = NULL,
    y = "Mentions",
    caption = "Bars = number of mentions. Source: Social Media Insights Lab, MTSU School of Journalism & Strategic Media"
  ) +
  theme_minimal()

InitialPlot

# MANUAL ADJUSTMENTS

# Categorize selected bigrams

tidy_bigrams <- tidy_bigrams %>% 
  mutate(bigram = case_when(
    grepl("trump", bigram, ignore.case = TRUE) ~ "TRUMP",
    grepl("immig|border|illegal alien", bigram, ignore.case = TRUE) ~ "IMMIGRATION",
    grepl("republican", bigram, ignore.case = TRUE) ~ "REPUBLICAN(S)",
    grepl("democrat", bigram, ignore.case = TRUE) ~ "DEMOCRAT(S)",
    grepl("pope leo|leo xiv", bigram, ignore.case = TRUE) ~ "POPE LEO",
    TRUE ~ bigram))

BigramFrequency <- tidy_bigrams %>% 
  count(party, bigram, sort = TRUE) %>% 
  left_join(tidy_tweets %>% 
              count(party, name = "total")) %>%
  mutate(freq = n/total)

# Recount bigrams with categories

top_bigrams <- BigramFrequency %>%
  filter(party %in% c("Democrat", "Republican")) %>% 
  group_by(party) %>%
  slice_max(order_by = n, n = 40, with_ties = FALSE) %>%
  mutate(bigram = fct_reorder(bigram, n)) %>%
  ungroup() %>%
  mutate(bigram_faceted = fct_reorder(interaction(party, bigram), n))

# Omit selected bigram categories

top_bigrams <- top_bigrams %>% 
  filter(!bigram %in% c("TRUMP", "REPUBLICAN(S)", "DEMOCRAT(S)"))

# Replot adjusted data

AdjustedPlot <- ggplot(top_bigrams, aes(x = bigram_faceted, y = n, fill = party)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~ party, scales = "free") +
  scale_fill_manual(values = c(
    "Republican" = "#C33C54",
    "Democrat" = "#254E70"
  )) +
  scale_x_discrete(labels = ~ gsub(".*\\.", "", .)) +
  labs(
    title = "Top 40 phrases Congress posted on X in May, by Party",
    x = NULL,
    y = "Mentions",
    caption = "Bars = number of mentions. Source: Social Media Insights Lab, MTSU School of Journalism & Strategic Media"
  ) +
  theme_minimal()

AdjustedPlot

# Cut graph to top 10 bigrams per party

top_bigrams <- BigramFrequency %>%
  filter(party %in% c("Democrat", "Republican")) %>% 
  group_by(party) %>%
  slice_max(order_by = n, n = 10, with_ties = FALSE) %>%
  mutate(bigram = fct_reorder(bigram, n)) %>%
  ungroup() %>%
  mutate(bigram_faceted = fct_reorder(interaction(party, bigram), n))

top_bigrams <- top_bigrams %>% 
  filter(!bigram %in% c("TRUMP", "REPUBLICAN(S)", "DEMOCRAT(S)"))

# Plot with custom colors

ShortPlot <- ggplot(top_bigrams, aes(x = bigram_faceted, y = n, fill = party)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~ party, scales = "free") +
  scale_fill_manual(values = c(
    "Republican" = "#C33C54",
    "Democrat" = "#254E70"
  )) +
  scale_x_discrete(labels = ~ gsub(".*\\.", "", .)) +
  labs(title = "Top 10 phrases members of Congress posted on X in May, by Party",
       x = NULL,
       y = NULL,
       caption = "Bars = number of mentions. Source: Social Media Insights Lab, MTSU School of Journalism & Strategic Media") +
  theme_minimal(base_size = 14) +  # increase base font size
  theme(
    plot.title = element_text(size = 20, face = "bold"),
    axis.text = element_text(size = 14),
    strip.text = element_text(size = 14),  # facet labels
    plot.caption = element_text(size = 12)
  )

ShortPlot