library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: weights
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: assertthat
## 
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
## 
##     has_name
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## Loading required package: metafor
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading the 'metafor' package (version 3.0-2). For an
## introduction to the package please type: help(metafor)
## Loading required package: rlang
## 
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
## 
##     set_names
## The following object is masked from 'package:assertthat':
## 
##     has_name
## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
##     splice
## 
## Attaching package: 'kirkegaard'
## The following object is masked from 'package:rlang':
## 
##     is_logical
## The following object is masked from 'package:psych':
## 
##     rescale
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  rvest,
  lubridate
)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
theme_set(theme_bw())


#read url
site = read_html("https://www.senate.gov/legislative/nominations/SupremeCourtNominations1789present.htm")

#get the years of confirmations
confirmations = site %>% html_nodes("td:nth-child(11)") %>% html_text()
results = site %>% html_nodes("td:nth-child(9)") %>% html_text() %>% .[-1]

#make a dataset
d = tibble(
  confirm_date = confirmations %>% as_date(format = "%b %d, %Y"),
  year = year(confirm_date),
  result = results
)

#confirmed ones only
d_conf = d %>% filter(result == "C")

#how many years are missing?
#since start
length(unique(d_conf$year)) / (seq(min(d_conf$year, na.rm = T), max(d_conf$year, na.rm = T)) %>% length())
## [1] 0.3965517
#since 1900
length(unique(d_conf %>% filter(year >= 1900) %>% pull(year))) / (seq(1900, 2020) %>% length())
## [1] 0.3966942
#counts per year
d_conf_year = map_df(min(d_conf$year, na.rm = T):max(d_conf$year, na.rm = T), function(y) {
  #count
  tibble(
    year = y,
    count = d_conf %>% filter(year == y) %>% nrow()
  )
})

#plot counts linearly
d_conf_year %>% 
  ggplot(aes(year, count, fill = ordered(count))) + 
  geom_bar(stat = "identity") +
  labs(fill = "Confirmations")

#distribution of counts
d_conf_year %>% 
  ggplot(aes(count)) + 
  geom_bar(aes(y = ..prop..)) +
  scale_x_continuous(breaks = 0:100) +
  scale_y_continuous("Percentage", labels = scales::percent)