Extracting Twitter Handles for members of Congress

This R script extracts official X/Twitter handles for members of a specified U.S. Congress from the archive at https://github.com/unitedstates/congress-legislators, looks for missing records by comparing the results to the official list of members available from the Congress.gov API (an API key is required), displays records that are missiong entirely or are missing an X/Twitter handle to allow for a manual compilation of the missing information, then adds the manually gathered information and exports the data in .csv format. I used ChatGPT to help write parts of the code that I wasn’t sure how to produce on my own.
# An R script to extract current U.S. Congress members'
# official social media accounts and ID info
# from https://github.com/unitedstates/congress-legislators

# Install required packages
if (!require("jsonlite")) install.packages("jsonlite")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("purrr")) install.packages("purrr")
if (!require("tibble")) install.packages("tibble")
if (!require("httr")) install.packages("httr")
if (!require("stringi")) install.packages("stringi")

# Load required libraries
library(jsonlite)
library(tidyverse)
library(purrr)
library(tibble)
library(httr)
library(stringi)

# Define safe null operator
`%||%` <- function(a, b) if (!is.null(a)) a else b

# Define URLs
social_url <- "https://unitedstates.github.io/congress-legislators/legislators-social-media.json"
current_url <- "https://unitedstates.github.io/congress-legislators/legislators-current.json"

# Load JSON data
social_raw <- fromJSON(social_url, simplifyVector = FALSE)
current_raw <- fromJSON(current_url, simplifyVector = FALSE)

# Extract social media data
social_df <- map_df(social_raw, function(x) {
  tibble(
    bioguide = x$id$bioguide %||% NA,
    twitter = x$social$twitter %||% NA,
    facebook = x$social$facebook %||% NA,
    youtube = x$social$youtube %||% NA
  )
})

# Extract legislator info including term details
legislator_df <- map_df(current_raw, function(x) {
  last_term <- x$terms[[length(x$terms)]]
  
  tibble(
    bioguide = x$id$bioguide %||% NA,
    first_name = x$name$first %||% NA,
    last_name = x$name$last %||% NA,
    official_full = x$name$official_full %||% NA,
    gender = x$bio$gender %||% NA,
    party = last_term$party %||% NA,
    state = last_term$state %||% NA,
    district = last_term$district %||% NA,
    chamber = last_term$type %||% NA,
    # 'rep' for House, 'sen' for Senate
    term_start = last_term$start %||% NA,
    term_end = last_term$end %||% NA
  )
})

# Convert chamber codes to readable form
legislator_df <- legislator_df %>%
  mutate(chamber = recode(chamber, rep = "House", sen = "Senate"))

# Join social media with legislator info
combined_df <- left_join(social_df, legislator_df, by = "bioguide")

# Add a column giving the last-then-first name, with a comma separator
combined_df <- combined_df %>%
  mutate(last_first = paste(last_name, first_name, sep = ", ")) %>%
  select(
    last_first,
    bioguide,
    first_name,
    last_name,
    official_full,
    gender,
    party,
    state,
    district,
    chamber,
    term_start,
    term_end,
    twitter,
    facebook,
    youtube
  )

# View the result
print(head(combined_df, 10))

rm(current_raw,
   legislator_df,
   social_df,
   social_raw,
   current_url,
   social_url,
   `%||%`)

# Check for missing member information

# Set your Congress.gov API key
api_key <- "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

congress_number <- 119
base_url <- paste0("https://api.congress.gov/v3/member/congress/",
                   congress_number)

# Pagination setup
offset <- 0
page_size <- 250
members_all <- list()

repeat {
  url <- paste0(base_url, "?offset=", offset, "&limit=", page_size)
  
  res <- GET(url, add_headers("X-API-Key" = api_key))
  stop_for_status(res)
  data <- fromJSON(content(res, "text", encoding = "UTF-8"), flatten = TRUE)
  
  if (!"members" %in% names(data)) {
    break
  }
  
  members_all <- append(members_all, list(data$members))
  
  # Exit if no further pages
  if (is.null(data$pagination[["next"]]))
    break
  
  offset <- offset + page_size
}

# Combine all pages into a single data frame
members_df <- bind_rows(members_all) %>%
  select(bioguideId, name, partyName, state) %>%
  rename(bioguide = bioguideId)

missing_in_combined <- anti_join(members_df, combined_df, by = "bioguide") %>%
  rename(last_first = name, party = partyName)

Congress_handles <- bind_rows(combined_df, missing_in_combined) %>% 
  arrange(last_first)

# Select cases with no Twitter handle
NoTwit <- Congress_handles %>%
  filter(is.na(twitter)) 

# Write names for selected cases to the screen, copy into ChatGPT,
# and search for twitter handles. Then, verify each handle
# individually

writeLines(pull(NoTwit, last_first))

# Add missing Twitter handles 
Congress_handles <- Congress_handles %>%
  mutate(last_first = stri_trans_nfc(last_first)) %>%
  mutate(twitter = case_when(last_first == 'Alsobrooks, Angela' ~ 'Sen_Alsobrooks',
                             last_first == 'Cherfilus-McCormick, Sheila' ~ 'CongresswomanSC',
                             last_first == 'Cisneros, Gilbert' ~ 'RepGilCisneros',
                             last_first == 'Connolly, Gerald E.' ~ 'GerryConnolly',
                             last_first == 'Fetterman, John' ~ 'SenFettermanPA',
                             last_first == 'Fine, Randy' ~ 'RepFine',
                             last_first == 'Gooden, Lance' ~ 'Lancegooden',
                             last_first == 'Graham, Lindsey' ~ 'LindseyGrahamSC',
                             last_first == 'Grijalva, Raúl M.' ~ 'RepRaulGrijalva',
                             bioguide == 'G000551' ~ 'RepRaulGrijalva',
                             last_first == 'Hamadeh, Abraham J.' ~ 'RepAbeHamadeh',
                             last_first == 'Husted, Jon' ~ 'SenJonHusted',
                             last_first == 'Jackson, Jonathan' ~ 'rep_jackson',
                             last_first == 'Justice, Jim' ~ 'JimJustice_WV',
                             last_first == 'Kaine, Timothy' ~ 'timkaine',
                             last_first == 'Kim, Andy' ~ 'AndyKimNJ',
                             last_first == 'McBride, Sarah' ~ 'Rep_McBride',
                             last_first == 'McCormick, Dave' ~ 'SenMcCormickPA',
                             last_first == 'Messmer, Mark' ~ 'RepMessmer',
                             last_first == 'Min, Dave' ~ 'CongressMin',
                             last_first == 'Moody, Ashley' ~ 'SenAshleyMoody',
                             last_first == 'Patronis, Jimmy' ~ 'JimmyPatronis',
                             last_first == 'Randall, Emily' ~ 'repemilyrandall',
                             last_first == 'Riley, Josh' ~ 'RepRileyNY',
                             last_first == 'Rubio, Marco' ~ 'marcorubio',
                             last_first == 'Rulli, Michael A.' ~ 'RepMichaelRulli',
                             last_first == 'Schiff, Adam' ~ 'SenAdamSchiff',
                             last_first == 'Sheehy, Tim' ~ 'TimSheehyMT',
                             last_first == 'Smith, Christopher' ~ '',
                             last_first == 'Tran, Derek' ~ 'RepDerekTranCA',
                             last_first == 'Turner, Sylvester' ~ 'SylvesterTurner',
                             last_first == 'Vance, J. D.' ~ 'JDVance',
                             last_first == 'Waltz, Michael' ~ 'michaelgwaltz',
                             TRUE ~ twitter))

# I kept members who have died. But I decided to drop
# members who have moved from Congress into appointed
# positions within the executive administration or
# are not on X/Twitter (Rep. Chris Smith, of New Jersey)

Congress_handles <- Congress_handles %>% 
  filter(!last_first %in% c("Smith, Christopher",
                            "Rubio, Marco",
                            "Vance, J. D.",
                            "Waltz, Michael"))

# Abbreviate long state names from Congress.gov API
Congress_handles <- Congress_handles %>% 
  mutate(state = case_when(state == "Arizona" ~ "AZ",
                           state == "California" ~ "CA",
                           state == "Florida" ~ "FL",
                           state == "Montana" ~ "MT",
                           state == "Ohio" ~ "OH",
                           state == "Texas" ~ "TX",
                           state == "Virginia" ~ "VA",
                           state == "Washington" ~ "WA",
                           TRUE ~ state))

# Supply missing gender, chamber and district data for select cases

MissingDetails <- Congress_handles %>%
  filter(is.na(chamber)) 

write_csv(MissingDetails, "MissingDetails.csv")

# Manually added missing data using Excel, 
# using 999 for missing values, saved the .csv,
# then pasted the .csv data into the code below

# Store the CSV-formatted text in a string

csv_text <- '
last_first,bioguide,first_name,last_name,official_full,gender,party,state,district,chamber,term_start,term_end,twitter,facebook,youtube
"Connolly, Gerald E.",C001078,Gerald,Connolly,"Connolly, Gerald E.",M,Democratic,VA,11,House,NA,NA,GerryConnolly,NA,NA
"Fine, Randy",F000484,Randy,Fine,"Fine, Randy",M,Republican,FL,6,House,NA,NA,RepFine,NA,NA
"Grijalva, Raúl M.",G000551,Raul,Grijalva,"Grijalva, Raúl M.",M,Democratic,AZ,7,House,NA,NA,RepRaulGrijalva,NA,NA
"Hamadeh, Abraham J.",H001098,Abraham,Hamadeh,"Hamadeh, Abraham J.",M,Republican,AZ,8,House,NA,NA,RepAbeHamadeh,NA,NA
"Husted, Jon",H001104,Jon,Husted,"Husted, Jon",M,Republican,OH,999,Senate,NA,NA,SenJonHusted,NA,NA
"Min, Dave",M001241,Dave,Min,"Min, Dave",M,Democratic,CA,47,House,NA,NA,CongressMin,NA,NA
"Moody, Ashley",M001244,Ashley,Moody,"Moody, Ashley",F,Republican,FL,999,Senate,NA,NA,SenAshleyMoody,NA,NA
"Patronis, Jimmy",P000622,Jimmy,Patronis,"Patronis, Jimmy",M,Republican,FL,1,House,NA,NA,JimmyPatronis,NA,NA
"Randall, Emily",R000621,Emily,Randall,"Randall, Emily",F,Democratic,WA,6,House,NA,NA,repemilyrandall,NA,NA
"Rulli, Michael A.",R000619,Michael,Rulli,"Rulli, Michael A.",M,Republican,OH,6,House,NA,NA,RepMichaelRulli,NA,NA
"Sheehy, Tim",S001232,Tim,Sheehy,"Sheehy, Tim",M,Republican,MT,999,Senate,NA,NA,TimSheehyMT,NA,NA
"Tran, Derek",T000491,Derek,Tran,"Tran, Derek",M,Democratic,CA,45,House,NA,NA,RepDerekTranCA,NA,NA
"Turner, Sylvester",T000489,Sylvester,Turner,"Turner, Sylvester",M,Democratic,TX,18,House,NA,NA,SylvesterTurner,NA,NA
'

# Read the CSV text into a data frame
DetailsToAdd <- read.csv(textConnection(csv_text), stringsAsFactors = FALSE)

Congress_handles <- Congress_handles %>% 
  rows_update(
    DetailsToAdd %>% 
      filter(!is.na(gender) & !is.na(district) & !is.na(chamber)),
    by = "bioguide") %>% 
  mutate(district = ifelse(district == 999, NA, district))

# Clean up environment
rm(combined_df,
   data,
   members_all,
   members_df,
   missing_in_combined,
   NoTwit,
   res,
   api_key,
   base_url,
   congress_number,
   offset,
   page_size,
   url,
   DetailsToAdd,
   MissingDetails,
   csv_text)

write_csv(Congress_handles, "Congress_handles.csv")

# Ran this code after collecting posts for January through May
# to identify handles in Congress_handles.csv that had produced
# no posts.

library(tidyverse)

MemberDetails <- read_csv("Congress_handles.csv") %>% 
  rename(Author = twitter) %>% 
  mutate(author_lower = str_to_lower(Author))

DataToCheck  <- readRDS("Latest119thData.RDS")

Counts <- DataToCheck %>% 
  group_by(author_lower) %>% 
  summarize(Count = n())

MissingFromCounts <- MemberDetails %>%
  anti_join(Counts, by = "author_lower")

write.table(
  MissingFromCounts %>% select(last_first, author_lower, chamber),
  row.names = FALSE,
  quote = FALSE,
  sep = "|"
)

# Results of manual check:

# last_first    author_lower    chamber Update  Notes
# Bonamici, Suzanne repbonamici House   None    On Bluesky, not X. At repbonamici.bsky.social‬
# Budd, Ted reptedbudd  Senate  SenTedBuddNC    
# Clark, Katherine  repkclark   House   WhipKClark  
# Edwards, Charles (Chuck)  repedwards  House   RepChuckEdwards 
# Emmer, Tom    reptomemmer House   GOPMajorityWhip 
# Hunt, Wesley  repwesleyhunt   House   RepWPH  
# Johnson, Mike repmikejohnson  House   SpeakerJohnson  
# Malliotakis, Nicole   repmalliotakis  House   RepNicole   
# Marshall, Roger   senatormarshall Senate  RogerMarshallMD 
# Moylan, James (Jim)   jmoylanforguam  House   RepMoylan   X account on House page doesn't exist. Uses @RepMoylan instead.
# Murphy, Christopher   senmurphyoffice Senate  ChrisMurphyCT   No X account listed on Senate page. This is the one he uses, though.
# Pou, Nellie   caohouse    House   RepNellie   
# Randall, Emily    repemilyrandall House   EmilyRandallWA  Mainly on Bluesky, at repemilyrandall.bsky.social
# Welch, Peter  peterwelch  Senate  SenPeterWelch   

# Created a one-time Brandwatch query that searched the updated handels
# for posts since January 1, 2025
# Downloaded those posts as 20250622HandlesUpdate.csv

# Update X handles in Congress_handles.csv

Congress_handles <- read_csv("Congress_handles.csv") %>% 
  mutate(twitter_lower = str_to_lower(twitter)) %>% 
  mutate(twitter = case_when(twitter_lower == "reptedbudd" ~ "SenTedBuddNC",
                             twitter_lower == "repkclark" ~ "WhipKClark",
                             twitter_lower == "repedwards" ~ "RepChuckEdwards",
                             twitter_lower == "reptomemmer" ~ "GOPMajorityWhip",
                             twitter_lower == "repwesleyhunt" ~ "RepWPH",
                             twitter_lower == "repmikejohnson" ~ "SpeakerJohnson",
                             twitter_lower == "repmalliotakis" ~ "RepNicole",
                             twitter_lower == "senatormarshall" ~ "RogerMarshallMD",
                             twitter_lower == "jmoylanforguam" ~ "RepMoylan",
                             twitter_lower == "senmurphyoffice" ~ "ChrisMurphyCT",
                             twitter_lower == "caohouse" ~ "RepNellie",
                             twitter_lower == "repemilyrandall" ~ "EmilyRandallWA",
                             twitter_lower == "peterwelch" ~ "SenPeterWelch",
                             TRUE ~ twitter_lower)) %>% 
  select(-c("twitter_lower"))

# Standardizing "Democrat" and "Democratic" party values as "Democrat."

Congress_handles <- Congress_handles %>% 
  mutate(party = case_when(party == "Democratic" ~ "Democrat",
                           TRUE ~ party))

# Saving final data in .csv format

write_csv(Congress_handles, "Congress_handles.csv")
Extracting Twitter Handles for members of Congress

Ken Blake

2025-06-24