This R script extracts official X/Twitter handles for members of a specified U.S. Congress from the archive at https://github.com/unitedstates/congress-legislators, looks for missing records by comparing the results to the official list of members available from the Congress.gov API (an API key is required), displays records that are missiong entirely or are missing an X/Twitter handle to allow for a manual compilation of the missing information, then adds the manually gathered information and exports the data in .csv format. I used ChatGPT to help write parts of the code that I wasn’t sure how to produce on my own.
# An R script to extract current U.S. Congress members'
# official social media accounts and ID info
# from https://github.com/unitedstates/congress-legislators
# Install required packages
if (!require("jsonlite")) install.packages("jsonlite")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("purrr")) install.packages("purrr")
if (!require("tibble")) install.packages("tibble")
if (!require("httr")) install.packages("httr")
if (!require("stringi")) install.packages("stringi")
# Load required libraries
library(jsonlite)
library(tidyverse)
library(purrr)
library(tibble)
library(httr)
library(stringi)
# Define safe null operator
`%||%` <- function(a, b) if (!is.null(a)) a else b
# Define URLs
social_url <- "https://unitedstates.github.io/congress-legislators/legislators-social-media.json"
current_url <- "https://unitedstates.github.io/congress-legislators/legislators-current.json"
# Load JSON data
social_raw <- fromJSON(social_url, simplifyVector = FALSE)
current_raw <- fromJSON(current_url, simplifyVector = FALSE)
# Extract social media data
social_df <- map_df(social_raw, function(x) {
tibble(
bioguide = x$id$bioguide %||% NA,
twitter = x$social$twitter %||% NA,
facebook = x$social$facebook %||% NA,
youtube = x$social$youtube %||% NA
)
})
# Extract legislator info including term details
legislator_df <- map_df(current_raw, function(x) {
last_term <- x$terms[[length(x$terms)]]
tibble(
bioguide = x$id$bioguide %||% NA,
first_name = x$name$first %||% NA,
last_name = x$name$last %||% NA,
official_full = x$name$official_full %||% NA,
gender = x$bio$gender %||% NA,
party = last_term$party %||% NA,
state = last_term$state %||% NA,
district = last_term$district %||% NA,
chamber = last_term$type %||% NA,
# 'rep' for House, 'sen' for Senate
term_start = last_term$start %||% NA,
term_end = last_term$end %||% NA
)
})
# Convert chamber codes to readable form
legislator_df <- legislator_df %>%
mutate(chamber = recode(chamber, rep = "House", sen = "Senate"))
# Join social media with legislator info
combined_df <- left_join(social_df, legislator_df, by = "bioguide")
# Add a column giving the last-then-first name, with a comma separator
combined_df <- combined_df %>%
mutate(last_first = paste(last_name, first_name, sep = ", ")) %>%
select(
last_first,
bioguide,
first_name,
last_name,
official_full,
gender,
party,
state,
district,
chamber,
term_start,
term_end,
twitter,
facebook,
youtube
)
# View the result
print(head(combined_df, 10))
rm(current_raw,
legislator_df,
social_df,
social_raw,
current_url,
social_url,
`%||%`)
# Check for missing member information
# Set your Congress.gov API key
api_key <- "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
congress_number <- 119
base_url <- paste0("https://api.congress.gov/v3/member/congress/",
congress_number)
# Pagination setup
offset <- 0
page_size <- 250
members_all <- list()
repeat {
url <- paste0(base_url, "?offset=", offset, "&limit=", page_size)
res <- GET(url, add_headers("X-API-Key" = api_key))
stop_for_status(res)
data <- fromJSON(content(res, "text", encoding = "UTF-8"), flatten = TRUE)
if (!"members" %in% names(data)) {
break
}
members_all <- append(members_all, list(data$members))
# Exit if no further pages
if (is.null(data$pagination[["next"]]))
break
offset <- offset + page_size
}
# Combine all pages into a single data frame
members_df <- bind_rows(members_all) %>%
select(bioguideId, name, partyName, state) %>%
rename(bioguide = bioguideId)
missing_in_combined <- anti_join(members_df, combined_df, by = "bioguide") %>%
rename(last_first = name, party = partyName)
Congress_handles <- bind_rows(combined_df, missing_in_combined) %>%
arrange(last_first)
# Select cases with no Twitter handle
NoTwit <- Congress_handles %>%
filter(is.na(twitter))
# Write names for selected cases to the screen, copy into ChatGPT,
# and search for twitter handles. Then, verify each handle
# individually
writeLines(pull(NoTwit, last_first))
# Add missing Twitter handles
Congress_handles <- Congress_handles %>%
mutate(last_first = stri_trans_nfc(last_first)) %>%
mutate(twitter = case_when(last_first == 'Alsobrooks, Angela' ~ 'Sen_Alsobrooks',
last_first == 'Cherfilus-McCormick, Sheila' ~ 'CongresswomanSC',
last_first == 'Cisneros, Gilbert' ~ 'RepGilCisneros',
last_first == 'Connolly, Gerald E.' ~ 'GerryConnolly',
last_first == 'Fetterman, John' ~ 'SenFettermanPA',
last_first == 'Fine, Randy' ~ 'RepFine',
last_first == 'Gooden, Lance' ~ 'Lancegooden',
last_first == 'Graham, Lindsey' ~ 'LindseyGrahamSC',
last_first == 'Grijalva, Raúl M.' ~ 'RepRaulGrijalva',
bioguide == 'G000551' ~ 'RepRaulGrijalva',
last_first == 'Hamadeh, Abraham J.' ~ 'RepAbeHamadeh',
last_first == 'Husted, Jon' ~ 'SenJonHusted',
last_first == 'Jackson, Jonathan' ~ 'rep_jackson',
last_first == 'Justice, Jim' ~ 'JimJustice_WV',
last_first == 'Kaine, Timothy' ~ 'timkaine',
last_first == 'Kim, Andy' ~ 'AndyKimNJ',
last_first == 'McBride, Sarah' ~ 'Rep_McBride',
last_first == 'McCormick, Dave' ~ 'SenMcCormickPA',
last_first == 'Messmer, Mark' ~ 'RepMessmer',
last_first == 'Min, Dave' ~ 'CongressMin',
last_first == 'Moody, Ashley' ~ 'SenAshleyMoody',
last_first == 'Patronis, Jimmy' ~ 'JimmyPatronis',
last_first == 'Randall, Emily' ~ 'repemilyrandall',
last_first == 'Riley, Josh' ~ 'RepRileyNY',
last_first == 'Rubio, Marco' ~ 'marcorubio',
last_first == 'Rulli, Michael A.' ~ 'RepMichaelRulli',
last_first == 'Schiff, Adam' ~ 'SenAdamSchiff',
last_first == 'Sheehy, Tim' ~ 'TimSheehyMT',
last_first == 'Smith, Christopher' ~ '',
last_first == 'Tran, Derek' ~ 'RepDerekTranCA',
last_first == 'Turner, Sylvester' ~ 'SylvesterTurner',
last_first == 'Vance, J. D.' ~ 'JDVance',
last_first == 'Waltz, Michael' ~ 'michaelgwaltz',
TRUE ~ twitter))
# I kept members who have died. But I decided to drop
# members who have moved from Congress into appointed
# positions within the executive administration or
# are not on X/Twitter (Rep. Chris Smith, of New Jersey)
Congress_handles <- Congress_handles %>%
filter(!last_first %in% c("Smith, Christopher",
"Rubio, Marco",
"Vance, J. D.",
"Waltz, Michael"))
# Abbreviate long state names from Congress.gov API
Congress_handles <- Congress_handles %>%
mutate(state = case_when(state == "Arizona" ~ "AZ",
state == "California" ~ "CA",
state == "Florida" ~ "FL",
state == "Montana" ~ "MT",
state == "Ohio" ~ "OH",
state == "Texas" ~ "TX",
state == "Virginia" ~ "VA",
state == "Washington" ~ "WA",
TRUE ~ state))
# Supply missing gender, chamber and district data for select cases
MissingDetails <- Congress_handles %>%
filter(is.na(chamber))
write_csv(MissingDetails, "MissingDetails.csv")
# Manually added missing data using Excel,
# using 999 for missing values, saved the .csv,
# then pasted the .csv data into the code below
# Store the CSV-formatted text in a string
csv_text <- '
last_first,bioguide,first_name,last_name,official_full,gender,party,state,district,chamber,term_start,term_end,twitter,facebook,youtube
"Connolly, Gerald E.",C001078,Gerald,Connolly,"Connolly, Gerald E.",M,Democratic,VA,11,House,NA,NA,GerryConnolly,NA,NA
"Fine, Randy",F000484,Randy,Fine,"Fine, Randy",M,Republican,FL,6,House,NA,NA,RepFine,NA,NA
"Grijalva, Raúl M.",G000551,Raul,Grijalva,"Grijalva, Raúl M.",M,Democratic,AZ,7,House,NA,NA,RepRaulGrijalva,NA,NA
"Hamadeh, Abraham J.",H001098,Abraham,Hamadeh,"Hamadeh, Abraham J.",M,Republican,AZ,8,House,NA,NA,RepAbeHamadeh,NA,NA
"Husted, Jon",H001104,Jon,Husted,"Husted, Jon",M,Republican,OH,999,Senate,NA,NA,SenJonHusted,NA,NA
"Min, Dave",M001241,Dave,Min,"Min, Dave",M,Democratic,CA,47,House,NA,NA,CongressMin,NA,NA
"Moody, Ashley",M001244,Ashley,Moody,"Moody, Ashley",F,Republican,FL,999,Senate,NA,NA,SenAshleyMoody,NA,NA
"Patronis, Jimmy",P000622,Jimmy,Patronis,"Patronis, Jimmy",M,Republican,FL,1,House,NA,NA,JimmyPatronis,NA,NA
"Randall, Emily",R000621,Emily,Randall,"Randall, Emily",F,Democratic,WA,6,House,NA,NA,repemilyrandall,NA,NA
"Rulli, Michael A.",R000619,Michael,Rulli,"Rulli, Michael A.",M,Republican,OH,6,House,NA,NA,RepMichaelRulli,NA,NA
"Sheehy, Tim",S001232,Tim,Sheehy,"Sheehy, Tim",M,Republican,MT,999,Senate,NA,NA,TimSheehyMT,NA,NA
"Tran, Derek",T000491,Derek,Tran,"Tran, Derek",M,Democratic,CA,45,House,NA,NA,RepDerekTranCA,NA,NA
"Turner, Sylvester",T000489,Sylvester,Turner,"Turner, Sylvester",M,Democratic,TX,18,House,NA,NA,SylvesterTurner,NA,NA
'
# Read the CSV text into a data frame
DetailsToAdd <- read.csv(textConnection(csv_text), stringsAsFactors = FALSE)
Congress_handles <- Congress_handles %>%
rows_update(
DetailsToAdd %>%
filter(!is.na(gender) & !is.na(district) & !is.na(chamber)),
by = "bioguide") %>%
mutate(district = ifelse(district == 999, NA, district))
# Clean up environment
rm(combined_df,
data,
members_all,
members_df,
missing_in_combined,
NoTwit,
res,
api_key,
base_url,
congress_number,
offset,
page_size,
url,
DetailsToAdd,
MissingDetails,
csv_text)
write_csv(Congress_handles, "Congress_handles.csv")
# Ran this code after collecting posts for January through May
# to identify handles in Congress_handles.csv that had produced
# no posts.
library(tidyverse)
MemberDetails <- read_csv("Congress_handles.csv") %>%
rename(Author = twitter) %>%
mutate(author_lower = str_to_lower(Author))
DataToCheck <- readRDS("Latest119thData.RDS")
Counts <- DataToCheck %>%
group_by(author_lower) %>%
summarize(Count = n())
MissingFromCounts <- MemberDetails %>%
anti_join(Counts, by = "author_lower")
write.table(
MissingFromCounts %>% select(last_first, author_lower, chamber),
row.names = FALSE,
quote = FALSE,
sep = "|"
)
# Results of manual check:
# last_first author_lower chamber Update Notes
# Bonamici, Suzanne repbonamici House None On Bluesky, not X. At repbonamici.bsky.social
# Budd, Ted reptedbudd Senate SenTedBuddNC
# Clark, Katherine repkclark House WhipKClark
# Edwards, Charles (Chuck) repedwards House RepChuckEdwards
# Emmer, Tom reptomemmer House GOPMajorityWhip
# Hunt, Wesley repwesleyhunt House RepWPH
# Johnson, Mike repmikejohnson House SpeakerJohnson
# Malliotakis, Nicole repmalliotakis House RepNicole
# Marshall, Roger senatormarshall Senate RogerMarshallMD
# Moylan, James (Jim) jmoylanforguam House RepMoylan X account on House page doesn't exist. Uses @RepMoylan instead.
# Murphy, Christopher senmurphyoffice Senate ChrisMurphyCT No X account listed on Senate page. This is the one he uses, though.
# Pou, Nellie caohouse House RepNellie
# Randall, Emily repemilyrandall House EmilyRandallWA Mainly on Bluesky, at repemilyrandall.bsky.social
# Welch, Peter peterwelch Senate SenPeterWelch
# Created a one-time Brandwatch query that searched the updated handels
# for posts since January 1, 2025
# Downloaded those posts as 20250622HandlesUpdate.csv
# Update X handles in Congress_handles.csv
Congress_handles <- read_csv("Congress_handles.csv") %>%
mutate(twitter_lower = str_to_lower(twitter)) %>%
mutate(twitter = case_when(twitter_lower == "reptedbudd" ~ "SenTedBuddNC",
twitter_lower == "repkclark" ~ "WhipKClark",
twitter_lower == "repedwards" ~ "RepChuckEdwards",
twitter_lower == "reptomemmer" ~ "GOPMajorityWhip",
twitter_lower == "repwesleyhunt" ~ "RepWPH",
twitter_lower == "repmikejohnson" ~ "SpeakerJohnson",
twitter_lower == "repmalliotakis" ~ "RepNicole",
twitter_lower == "senatormarshall" ~ "RogerMarshallMD",
twitter_lower == "jmoylanforguam" ~ "RepMoylan",
twitter_lower == "senmurphyoffice" ~ "ChrisMurphyCT",
twitter_lower == "caohouse" ~ "RepNellie",
twitter_lower == "repemilyrandall" ~ "EmilyRandallWA",
twitter_lower == "peterwelch" ~ "SenPeterWelch",
TRUE ~ twitter_lower)) %>%
select(-c("twitter_lower"))
# Standardizing "Democrat" and "Democratic" party values as "Democrat."
Congress_handles <- Congress_handles %>%
mutate(party = case_when(party == "Democratic" ~ "Democrat",
TRUE ~ party))
# Saving final data in .csv format
write_csv(Congress_handles, "Congress_handles.csv")