library(tidyverse)
library(ProPublicaR)
library(knitr)
library(glue)
library(janitor)
library(ggthemes)
library(here)
library(tidyboot)
library(broom)

opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, 
               error = FALSE, cache = TRUE, tidy = FALSE)

options(dplyr.summarise.inform = FALSE)
theme_set(theme_few(base_size = 18))

Set up our timeline of interest: George W. Bush (2001)-Trump (2020). We’ll want to know who controls congress during any given session.

timeline <- expand_grid(year = 1989:2020, month = 1:12) %>%
  mutate(month = if_else(month < 10, glue("0{month}"),
                          as.character(month)))

president <- tibble(congress = 101:116,
                    president = c(rep("republican", 2), 
                                  rep("democratic", 4), 
                                  rep("republican", 4), 
                                  rep("democratic", 4),
                                  rep("republican", 2)))

house <- tibble(congress = 101:116,
                house = c(rep("democratic",3), rep("republican", 6), 
                          rep("democratic", 2), rep("republican", 4), 
                          "democratic"))

Helper functions that get all votes cast in a given year/month and the party of the bill’s sponsor

process_votes <- function(year, month, chamber = "house") {
  votes <- get_votes_by_date(chamber, year, month, myAPI_Key = KEY) %>%
    pluck("results") %>%
    pluck("votes") %>%
    map(., ~ .x %>% unlist %>% enframe) %>%
    bind_rows(.id = "bill")
    
  if(nrow(votes) > 0) {
    votes %>%
      pivot_wider(names_from = "name", values_from = "value") %>%
      clean_names()
  }
}

get_party <- function(member_id) {
  get_congress_member(member_id, myAPI_Key = KEY) %>%
  pluck("results") %>%
  map(., ~ .x %>% unlist %>% enframe) %>%
  first() %>%
  filter(name == "current_party") %>%
  pull(value)
}

Get all the data using the ProPublica API. This code is slow so we’ll write the results to a csv and read them back in on subsequent runs.

all_votes <-  timeline %>%
  mutate(year_original = year, month_original = month) %>%
  group_by(year_original, month_original) %>%
  nest() %>%
  mutate(votes = map(data, ~process_votes(pull(.x, year), pull(.x, month)))) %>%
  select(-data) %>%
  filter(!is.null(votes)) %>%
  unnest(cols = c(votes)) %>%
  filter(!is.na(bill_sponsor_id))

sponsors <- all_votes %>%
  ungroup() %>%
  distinct(bill_sponsor_id) %>%
  rowwise() %>%
  mutate(bill_party = get_party(bill_sponsor_id))

tidy_votes <- all_votes %>%
  ungroup() %>%
  rename(year = year_original, month = month_original) %>%
  left_join(sponsors, by = "bill_sponsor_id") %>%
  select(year, month, congress, bill_party, bill, result, democratic_yes, 
         democratic_no, democratic_present, democratic_not_voting, 
         republican_yes,republican_no, republican_present, 
         republican_not_voting) %>%
  pivot_longer(cols = c(democratic_yes, democratic_no, democratic_present, 
         democratic_not_voting, republican_yes, republican_no, 
         republican_present, republican_not_voting), names_to = "measure") %>%
  separate(measure, into = c("party", "type"), extra = "merge") %>%
  filter(bill_party %in% c("D", "R")) %>%
  mutate(bill_party = factor(bill_party, 
                             labels = c("democratic", "republican")),
         party = factor(party),
         value = as.numeric(value)) %>%
  mutate(sponsor = if_else(bill_party == party, "same", "different")) %>%
  pivot_wider(id_cols = c(year, month, congress, bill, party, sponsor, result), 
              names_from = type, values_from = value) %>%
  mutate(congress = as.numeric(congress)) %>%
  left_join(house, by = "congress") %>%
  mutate(party = factor(party, levels = c("republican", "democratic")),
         house =factor(house, levels = c("republican", "democratic")),
         status = factor(if_else(party == house, "majority", "minority"))) %>%
  mutate(result = fct_collapse(result, passed = c("Passed", "Agreed to"), 
                               failed = "Failed"))

write_csv(sponsors, here("data/sponsors.csv"))
write_csv(tidy_votes, here("data/votes.csv"))
sponsors <- read_csv(here("data/sponsors.csv"))
tidy_votes <- read_csv(here("data/votes.csv")) %>%
  mutate(party = factor(party, levels = c("republican", "democratic")),
         house =factor(house, levels = c("republican", "democratic")))

Get average yes votes by session, party, sponsoring party, and vote outcome.

mean_votes <- tidy_votes %>%
  mutate(vote_yes = yes / (yes + no + present + not_voting),
         vote_no = no / (yes + no + present + not_voting)) %>%
  group_by(congress, house, status, party, sponsor) %>%
  tidyboot_mean(vote_yes, na.rm = TRUE) %>%
  rename(yes = empirical_stat)

means <- mean_votes %>%
  group_by(party, sponsor) %>%
  summarise(yes = mean(yes))
ggplot(mean_votes,
       aes(x = congress, y = yes, ymin = ci_lower, ymax = ci_upper,
           alpha = sponsor)) + 
  facet_wrap(~ party) + 
  geom_pointrange(aes(color = house, size = n)) + 
  geom_line() +
  scale_color_brewer(palette = "Set1") +
  labs(y = "proportion party members voting yes") +
  geom_hline(aes(yintercept = yes, alpha = sponsor), linetype = "dashed", 
              data = means) +
  scale_size_area(max_size = 1.5) +
  scale_alpha_discrete(range = c(.3, .8)) 

Proportion of party members voting yes. Circles are scaled to number of events, lines indicate 95% confidence intervals computed by non-parametric bootstrapping.

Statistical model

model <- glm(cbind(yes, no + present + not_voting) ~ party * sponsor * status,
      family = "binomial", data = tidy_votes)

tidy(model) %>%
  select(-std.error, -statistic) %>%
  mutate(p.value = papaja::printp(p.value)) %>%
  kable()
term estimate p.value
(Intercept) 2.1550206 < .001
partydemocratic 0.1136195 < .001
sponsorsame -1.6245916 < .001
statusminority -2.2307577 < .001
partydemocratic:sponsorsame 0.4478706 < .001
partydemocratic:statusminority -0.0222449 .185
sponsorsame:statusminority 3.1952534 < .001
partydemocratic:sponsorsame:statusminority -0.4552051 < .001

Now try splitting by whether the bill passed

mean_votes_result <- tidy_votes %>%
  mutate(vote_yes = yes / (yes + no + present + not_voting),
         vote_no = no / (yes + no + present + not_voting)) %>%
  group_by(congress, house, status, party, sponsor, result) %>%
  tidyboot_mean(vote_yes, na.rm = TRUE) %>%
  rename(yes = empirical_stat)

means_result <- mean_votes_result %>%
  group_by(party, sponsor, result) %>%
  summarise(yes = mean(yes)) %>%
  expand_grid(congress = pull(mean_votes, congress) %>% unique)
ggplot(mean_votes_result,
       aes(x = congress, y = yes, ymin = ci_lower, 
           ymax = ci_upper, alpha = sponsor)) + 
  facet_grid(result ~ party) + 
  geom_pointrange(aes(color = house, size = n)) + 
  geom_line() +
  scale_color_brewer(palette = "Set1") +
  labs(y = "proportion party members voting yes") +
 geom_hline(aes(yintercept = yes, alpha = sponsor), linetype = "dashed",
             data = means_result) +
 scale_size_area(max_size = 1.5) +
  scale_alpha_discrete(range = c(.3, .8))

Statistical model with result included

model_result <- glm(cbind(yes, no + present + not_voting) ~ party * sponsor * 
                      status * result,
      family = "binomial", data = tidy_votes)

tidy(model_result) %>%
  select(-std.error, -statistic) %>%
  mutate(p.value = papaja::printp(p.value)) %>%
  kable()
term estimate p.value
(Intercept) -0.3653180 < .001
partydemocratic -1.8095628 < .001
sponsorsame -1.1835346 < .001
statusminority 1.3847508 < .001
resultpassed 2.6942148 < .001
partydemocratic:sponsorsame 1.3017769 < .001
partydemocratic:statusminority 1.3740031 < .001
sponsorsame:statusminority 2.7233064 < .001
partydemocratic:resultpassed 1.9910770 < .001
sponsorsame:resultpassed 1.1423709 < .001
statusminority:resultpassed -4.0744657 < .001
partydemocratic:sponsorsame:statusminority -3.6900918 < .001
partydemocratic:sponsorsame:resultpassed -1.3081515 < .001
partydemocratic:statusminority:resultpassed -1.5159839 < .001
sponsorsame:statusminority:resultpassed -0.8434184 < .001
partydemocratic:sponsorsame:statusminority:resultpassed 3.8474441 < .001