This is a list of all United States presidents. Note: Multiple entries appear for a president whenever there was a change in the office of vice president.
#this part was adapted from Jared Lander's 'R for Everyone'
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.2.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.2
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(XML)
## Warning: package 'XML' was built under R version 3.2.2
url <- "http://www.loc.gov/rr/print/list/057_chron.html"
pres <- readHTMLTable(url, which=3, as.data.frame = TRUE, skip.rows = 1, header = TRUE, stringAsFactors = FALSE)
head(pres)
## YEAR PRESIDENT
## 1 1789-1797 George Washington
## 2 1797-1801 John Adams
## 3 1801-1805 Thomas Jefferson
## 4 1805-1809 Thomas Jefferson
## 5 1809-1812 James Madison
## 6 1812-1813 James Madison
## FIRST LADY VICE PRESIDENT
## 1 Martha Washington John Adams
## 2 Abigail Adams Thomas Jefferson
## 3 Martha Wayles Skelton Jefferson\n (no image) Aaron Burr
## 4 Martha Wayles Skelton Jefferson\n (no image) George Clinton
## 5 Dolley Madison George Clinton
## 6 Dolley Madison office vacant
Transforming the data:
#remove rows that are of no value
pres <- pres[c(2, 1)]
pres <- pres[-c(65:68), ]
#split up the years into a start and end column
pres2 <- pres %>%
separate(YEAR, c("start", "end"), 4)
#get rid of non digits
pres2$end <- str_extract(pres2$end, "\\d{4}")
#change numbers to numeric data types
pres2$start <- as.numeric(pres2$start)
pres2$end <- as.numeric(pres2$end)
#start and end years for each president
pres3 <- as.data.frame(pres2 %>%
group_by(PRESIDENT) %>%
mutate(START=min(start, na.rm = TRUE)) %>%
mutate(END=max(end, na.rm = TRUE)))
#if the end year is NA change it to the start year
pres3$END <- with(pres3, ifelse(is.na(END), START, END))
#change OBAMAS end date
pres3$END[64] <- 2017
#remove duplicate row, add column for term length
pres3 <- pres3 %>%
unique() %>%
mutate(LENGTH=(END-START))
#remove columns
pres3 <- pres3[ ,c(1, 4:6)]
#add Bernie
PRESIDENT <- "Bernie Sanders"
START <- as.integer(2017)
END <- as.integer(2025)
LENGTH <- as.integer(8)
bernie <- data.frame(PRESIDENT, START, END, LENGTH)
pres3 <- rbind(pres3, bernie)
#fix grover cleveland's length
pres3$LENGTH[22] <- 8
Here is the transformed data:
head(pres3)
## PRESIDENT START END LENGTH
## 1 George Washington 1789 1797 8
## 2 John Adams 1797 1801 4
## 3 Thomas Jefferson 1801 1809 8
## 4 Thomas Jefferson 1801 1809 8
## 5 James Madison 1809 1817 8
## 6 James Madison 1809 1817 8
tail(pres3)
## PRESIDENT START END LENGTH
## 60 Ronald Reagan 1981 1989 8
## 61 George Bush 1989 1993 4
## 62 Bill Clinton 1993 2001 8
## 63 George W. Bush 2001 2009 8
## 64 Barack Obama 2009 2017 8
## 65 Bernie Sanders 2017 2025 8
Now some data analysis… All President’s who served less than a full term in office:
p <- pres3[pres3$LENGTH < 4, ]
p
## PRESIDENT START END LENGTH
## 14 William Henry Harrison 1841 1841 0
## 17 Zachary Taylor 1849 1850 1
## 18 Millard Fillmore 1850 1853 3
## 29 James A. Garfield 1881 1881 0
## 43 Warren G. Harding 1921 1923 2
## 53 John F. Kennedy 1961 1963 2
## 58 Gerald R. Ford 1974 1977 3