These charts visualize the top three-word phrases in Congressional posts since January of 2025, by party and weekly volume.
Code:
# ==========================================================
# Step 1: Install and load required packages
# ==========================================================
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("plotly")) install.packages("plotly")
if (!require("tidytext")) install.packages("tidytext")
if (!require("kableExtra")) install.packages("kableExtra")
library(tidyverse)
library(plotly)
library(tidytext)
library(kableExtra)
# ==========================================================
# Step 2: USER SETTING — choose ngram size
# ==========================================================
n_size <- 3 # 1 = unigrams, 2 = bigrams, 3 = trigrams, etc.
# ============================================
# Step 3: LOAD AND FORMAT DATA
# ============================================
Data <- readRDS("Latest119thData.rds")
# ============================================
# --- Add "Week" variable ---
# ============================================
Data <- Data %>%
mutate(Date = as.Date(Date)) %>%
mutate(
WeekStart = lubridate::floor_date(Date, unit = "week", week_start = 1)
)
first_week <- min(Data$WeekStart, na.rm = TRUE)
Data <- Data %>%
mutate(Week = as.integer(difftime(WeekStart, first_week, units = "weeks")) + 1)
# ============================================
# --- Combine Democrats and independents ---
# ============================================
Data <- Data %>%
mutate(
party = case_when(
party %in% c("Democrat", "Independent") ~ "Dem/Ind",
party == "Republican" ~ "Rep",
TRUE ~ "error"
)
)
XData <- Data
# ==========================================================
# Step 4: Clean text
# ==========================================================
XData <- XData %>%
mutate(
Full.Text = str_to_lower(Full.Text),
Full.Text = str_replace_all(Full.Text, "[’‘`]", "")
)
# ==========================================================
# Step 5: Create ngrams
# ==========================================================
X_ngrams <- XData %>%
unnest_tokens(
ngram,
Full.Text,
token = "ngrams",
n = n_size
)
# ==========================================================
# Step 6: Separate ngrams into component words
# ==========================================================
data("stop_words")
ngram_cols <- paste0("w", 1:n_size)
ngrams_separated <- X_ngrams %>%
separate(
ngram,
into = ngram_cols,
sep = " ",
remove = FALSE
)
# ==========================================================
# Step 7: Remove stopwords
# ==========================================================
ngrams_clean <- ngrams_separated %>%
filter(
if_all(
all_of(ngram_cols),
~ !.x %in% stop_words$word
)
)
# ==========================================================
# Step 8: Remove noise
# ==========================================================
x_noise <- c(
"https", "tco", "t.co", "rt", "amp",
"co", "com", "www",
"twitter", "x",
"im", "its"
)
ngrams_clean <- ngrams_clean %>%
filter(
if_all(
all_of(ngram_cols),
~ !str_detect(.x, "^@") &
!str_detect(.x, "^#") &
!.x %in% x_noise &
str_detect(.x, "[a-z]")
)
)
# ==========================================================
# Step 9: Recombine ngrams
# ==========================================================
X_ngrams_clean <- ngrams_clean %>%
unite(
ngram,
all_of(ngram_cols),
sep = " "
)
# ==========================================================
# Step 10: Count ngrams by party
# ==========================================================
party_ngram_counts <- X_ngrams_clean %>%
count(party, ngram, sort = TRUE)
# saveRDS(party_ngram_counts, file = "All_Bigram_Counts_By_Party.rds")
# saveRDS(X_ngrams_clean, file = "All_Bigrams_clean.rds")
saveRDS(party_ngram_counts, file = "All_Triigram_Counts_By_Party.rds")
saveRDS(X_ngrams_clean, file = "All_Trigrams_clean.rds")
############################################################
# STEP 0a: Install required package (run once if needed)
############################################################
if (!requireNamespace("ggridges", quietly = TRUE)) {
install.packages("ggridges")
}
############################################################
# STEP 0b: Load required packages
############################################################
library(dplyr)
library(ggplot2)
library(ggridges)
############################################################
# STEP 1: Identify top 20 most frequent ngrams by party
############################################################
All_Trigrams_clean <- readRDS("All_Trigrams_clean.rds")
summary(All_Trigrams_clean)
# All_Trigrams_clean <- All_Trigrams_clean %>%
# filter(Week > 60)
top_20_by_party <- All_Trigrams_clean %>%
filter(!is.na(ngram), nchar(ngram) > 0) %>%
count(party, ngram, sort = TRUE) %>%
group_by(party) %>%
slice_head(n = 10) %>%
ungroup()
############################################################
# STEP 2: Republican data — weekly counts for top 20 ngrams
############################################################
rep_weekly <- All_Trigrams_clean %>%
filter(
party == "Rep",
ngram %in% top_20_by_party$ngram[top_20_by_party$party == "Rep"]
) %>%
count(WeekStart, ngram, name = "weekly_count")
############################################################
# STEP 3: Ridgeline-style faceted plot — Republicans
# Purpose: Each ngram gets its own panel with a clear label
############################################################
ggplot(rep_weekly,
aes(x = WeekStart, y = weekly_count, height = weekly_count)) +
geom_ridgeline(
scale = 1,
alpha = 0.7,
fill = "gray70",
color = "white"
) +
facet_wrap(
~ ngram,
ncol = 1,
scales = "free_y",
strip.position = "right"
) +
labs(
title = "Weekly Volume of Top 20 Trigrams: Republicans",
x = "Week",
y = "Weekly count"
) +
theme_ridges() +
theme(
strip.text.y.right = element_text(size = 9, angle = 0),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.spacing = unit(0.15, "lines")
)
############################################################
# STEP 4: Dem/Ind data — weekly counts for top 20 ngrams
############################################################
dem_weekly <- All_Trigrams_clean %>%
filter(
party == "Dem/Ind",
ngram %in% top_20_by_party$ngram[top_20_by_party$party == "Dem/Ind"]
) %>%
count(WeekStart, ngram, name = "weekly_count")
############################################################
# STEP 5: Ridgeline-style faceted plot — Democrats & Independents
############################################################
ggplot(dem_weekly,
aes(x = WeekStart, y = weekly_count, height = weekly_count)) +
geom_ridgeline(
scale = 1,
alpha = 0.7,
fill = "gray70",
color = "white"
) +
facet_wrap(
~ ngram,
ncol = 1,
scales = "free_y",
strip.position = "right"
) +
labs(
title = "Weekly Volume of Top 20 Trigrams: Democrats & Independents",
x = "Week",
y = "Weekly count"
) +
theme_ridges() +
theme(
strip.text.y.right = element_text(size = 9, angle = 0),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.spacing = unit(0.15, "lines")
)