library(tidyverse)
library(ProPublicaR)
library(knitr)
library(glue)
library(janitor)
library(ggthemes)
library(here)
library(tidyboot)
library(broom)
$set(echo = TRUE, message = FALSE, warning = FALSE,
opts_chunkerror = FALSE, cache = TRUE, tidy = FALSE)
options(dplyr.summarise.inform = FALSE)
theme_set(theme_few(base_size = 18))
Set up our timeline of interest: George W. Bush (2001)-Trump (2020). We’ll want to know who controls congress during any given session.
expand_grid(year = 1989:2020, month = 1:12) %>%
timeline <- mutate(month = if_else(month < 10, glue("0{month}"),
as.character(month)))
tibble(congress = 101:116,
president <-president = c(rep("republican", 2),
rep("democratic", 4),
rep("republican", 4),
rep("democratic", 4),
rep("republican", 2)))
tibble(congress = 101:116,
house <-house = c(rep("democratic",3), rep("republican", 6),
rep("democratic", 2), rep("republican", 4),
"democratic"))
Helper functions that get all votes cast in a given year/month and the party of the bill’s sponsor
function(year, month, chamber = "house") {
process_votes <- get_votes_by_date(chamber, year, month, myAPI_Key = KEY) %>%
votes <- pluck("results") %>%
pluck("votes") %>%
map(., ~ .x %>% unlist %>% enframe) %>%
bind_rows(.id = "bill")
if(nrow(votes) > 0) {
%>%
votes pivot_wider(names_from = "name", values_from = "value") %>%
clean_names()
}
}
function(member_id) {
get_party <-get_congress_member(member_id, myAPI_Key = KEY) %>%
pluck("results") %>%
map(., ~ .x %>% unlist %>% enframe) %>%
first() %>%
filter(name == "current_party") %>%
pull(value)
}
Get all the data using the ProPublica API. This code is slow so we’ll write the results to a csv and read them back in on subsequent runs.
timeline %>%
all_votes <- mutate(year_original = year, month_original = month) %>%
group_by(year_original, month_original) %>%
nest() %>%
mutate(votes = map(data, ~process_votes(pull(.x, year), pull(.x, month)))) %>%
select(-data) %>%
filter(!is.null(votes)) %>%
unnest(cols = c(votes)) %>%
filter(!is.na(bill_sponsor_id))
all_votes %>%
sponsors <- ungroup() %>%
distinct(bill_sponsor_id) %>%
rowwise() %>%
mutate(bill_party = get_party(bill_sponsor_id))
all_votes %>%
tidy_votes <- ungroup() %>%
rename(year = year_original, month = month_original) %>%
left_join(sponsors, by = "bill_sponsor_id") %>%
select(year, month, congress, bill_party, bill, result, democratic_yes,
democratic_no, democratic_present, democratic_not_voting,
republican_yes,republican_no, republican_present, %>%
republican_not_voting) pivot_longer(cols = c(democratic_yes, democratic_no, democratic_present,
democratic_not_voting, republican_yes, republican_no, names_to = "measure") %>%
republican_present, republican_not_voting), separate(measure, into = c("party", "type"), extra = "merge") %>%
filter(bill_party %in% c("D", "R")) %>%
mutate(bill_party = factor(bill_party,
labels = c("democratic", "republican")),
party = factor(party),
value = as.numeric(value)) %>%
mutate(sponsor = if_else(bill_party == party, "same", "different")) %>%
pivot_wider(id_cols = c(year, month, congress, bill, party, sponsor, result),
names_from = type, values_from = value) %>%
mutate(congress = as.numeric(congress)) %>%
left_join(house, by = "congress") %>%
mutate(party = factor(party, levels = c("republican", "democratic")),
house =factor(house, levels = c("republican", "democratic")),
status = factor(if_else(party == house, "majority", "minority"))) %>%
mutate(result = fct_collapse(result, passed = c("Passed", "Agreed to"),
failed = "Failed"))
write_csv(sponsors, here("data/sponsors.csv"))
write_csv(tidy_votes, here("data/votes.csv"))
read_csv(here("data/sponsors.csv"))
sponsors <- read_csv(here("data/votes.csv")) %>%
tidy_votes <- mutate(party = factor(party, levels = c("republican", "democratic")),
house =factor(house, levels = c("republican", "democratic")))
Get average yes votes by session, party, sponsoring party, and vote outcome.
tidy_votes %>%
mean_votes <- mutate(vote_yes = yes / (yes + no + present + not_voting),
vote_no = no / (yes + no + present + not_voting)) %>%
group_by(congress, house, status, party, sponsor) %>%
tidyboot_mean(vote_yes, na.rm = TRUE) %>%
rename(yes = empirical_stat)
mean_votes %>%
means <- group_by(party, sponsor) %>%
summarise(yes = mean(yes))
ggplot(mean_votes,
aes(x = congress, y = yes, ymin = ci_lower, ymax = ci_upper,
alpha = sponsor)) +
facet_wrap(~ party) +
geom_pointrange(aes(color = house, size = n)) +
geom_line() +
scale_color_brewer(palette = "Set1") +
labs(y = "proportion party members voting yes") +
geom_hline(aes(yintercept = yes, alpha = sponsor), linetype = "dashed",
data = means) +
scale_size_area(max_size = 1.5) +
scale_alpha_discrete(range = c(.3, .8))
Proportion of party members voting yes. Circles are scaled to number of events, lines indicate 95% confidence intervals computed by non-parametric bootstrapping.
Statistical model
glm(cbind(yes, no + present + not_voting) ~ party * sponsor * status,
model <-family = "binomial", data = tidy_votes)
tidy(model) %>%
select(-std.error, -statistic) %>%
mutate(p.value = papaja::printp(p.value)) %>%
kable()
term | estimate | p.value |
---|---|---|
(Intercept) | 2.1550206 | < .001 |
partydemocratic | 0.1136195 | < .001 |
sponsorsame | -1.6245916 | < .001 |
statusminority | -2.2307577 | < .001 |
partydemocratic:sponsorsame | 0.4478706 | < .001 |
partydemocratic:statusminority | -0.0222449 | .185 |
sponsorsame:statusminority | 3.1952534 | < .001 |
partydemocratic:sponsorsame:statusminority | -0.4552051 | < .001 |
Now try splitting by whether the bill passed
tidy_votes %>%
mean_votes_result <- mutate(vote_yes = yes / (yes + no + present + not_voting),
vote_no = no / (yes + no + present + not_voting)) %>%
group_by(congress, house, status, party, sponsor, result) %>%
tidyboot_mean(vote_yes, na.rm = TRUE) %>%
rename(yes = empirical_stat)
mean_votes_result %>%
means_result <- group_by(party, sponsor, result) %>%
summarise(yes = mean(yes)) %>%
expand_grid(congress = pull(mean_votes, congress) %>% unique)
ggplot(mean_votes_result,
aes(x = congress, y = yes, ymin = ci_lower,
ymax = ci_upper, alpha = sponsor)) +
facet_grid(result ~ party) +
geom_pointrange(aes(color = house, size = n)) +
geom_line() +
scale_color_brewer(palette = "Set1") +
labs(y = "proportion party members voting yes") +
geom_hline(aes(yintercept = yes, alpha = sponsor), linetype = "dashed",
data = means_result) +
scale_size_area(max_size = 1.5) +
scale_alpha_discrete(range = c(.3, .8))
Statistical model with result included
glm(cbind(yes, no + present + not_voting) ~ party * sponsor *
model_result <- status * result,
family = "binomial", data = tidy_votes)
tidy(model_result) %>%
select(-std.error, -statistic) %>%
mutate(p.value = papaja::printp(p.value)) %>%
kable()
term | estimate | p.value |
---|---|---|
(Intercept) | -0.3653180 | < .001 |
partydemocratic | -1.8095628 | < .001 |
sponsorsame | -1.1835346 | < .001 |
statusminority | 1.3847508 | < .001 |
resultpassed | 2.6942148 | < .001 |
partydemocratic:sponsorsame | 1.3017769 | < .001 |
partydemocratic:statusminority | 1.3740031 | < .001 |
sponsorsame:statusminority | 2.7233064 | < .001 |
partydemocratic:resultpassed | 1.9910770 | < .001 |
sponsorsame:resultpassed | 1.1423709 | < .001 |
statusminority:resultpassed | -4.0744657 | < .001 |
partydemocratic:sponsorsame:statusminority | -3.6900918 | < .001 |
partydemocratic:sponsorsame:resultpassed | -1.3081515 | < .001 |
partydemocratic:statusminority:resultpassed | -1.5159839 | < .001 |
sponsorsame:statusminority:resultpassed | -0.8434184 | < .001 |
partydemocratic:sponsorsame:statusminority:resultpassed | 3.8474441 | < .001 |