This is a list of all United States presidents. Note: Multiple entries appear for a president whenever there was a change in the office of vice president.

#this part was adapted from Jared Lander's 'R for Everyone'
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.2.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.2
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(XML)
## Warning: package 'XML' was built under R version 3.2.2
url <- "http://www.loc.gov/rr/print/list/057_chron.html"
pres <- readHTMLTable(url, which=3, as.data.frame = TRUE, skip.rows = 1, header = TRUE, stringAsFactors = FALSE)
head(pres)
##        YEAR         PRESIDENT
## 1 1789-1797 George Washington
## 2 1797-1801        John Adams
## 3 1801-1805  Thomas Jefferson
## 4 1805-1809  Thomas Jefferson
## 5 1809-1812     James Madison
## 6 1812-1813     James Madison
##                                       FIRST LADY   VICE PRESIDENT
## 1                              Martha Washington       John Adams
## 2                                  Abigail Adams Thomas Jefferson
## 3 Martha Wayles Skelton Jefferson\n   (no image)       Aaron Burr
## 4 Martha Wayles Skelton Jefferson\n   (no image)   George Clinton
## 5                                 Dolley Madison   George Clinton
## 6                                 Dolley Madison    office vacant

Transforming the data:

#remove rows that are of no value
pres <- pres[c(2, 1)]
pres <- pres[-c(65:68), ]

#split up the years into a start and end column
pres2 <- pres %>% 
  separate(YEAR, c("start", "end"), 4)
#get rid of non digits
pres2$end <- str_extract(pres2$end, "\\d{4}")
#change numbers to numeric data types
pres2$start <- as.numeric(pres2$start)
pres2$end <- as.numeric(pres2$end)

#start and end years for each president
pres3 <- as.data.frame(pres2 %>%
  group_by(PRESIDENT) %>%
  mutate(START=min(start, na.rm = TRUE)) %>%
  mutate(END=max(end, na.rm = TRUE)))

#if the end year is NA change it to the start year
pres3$END <- with(pres3, ifelse(is.na(END), START, END))
                  
#change OBAMAS end date
pres3$END[64] <- 2017

#remove duplicate row, add column for term length
pres3 <- pres3 %>% 
  unique() %>%
  mutate(LENGTH=(END-START))

#remove columns
pres3 <- pres3[ ,c(1, 4:6)]

#add Bernie
PRESIDENT <- "Bernie Sanders"
START <- as.integer(2017)
END <- as.integer(2025)
LENGTH <- as.integer(8)
bernie <- data.frame(PRESIDENT, START, END, LENGTH)
pres3 <- rbind(pres3, bernie)

#fix grover cleveland's length
pres3$LENGTH[22] <- 8

Here is the transformed data:

head(pres3)
##           PRESIDENT START  END LENGTH
## 1 George Washington  1789 1797      8
## 2        John Adams  1797 1801      4
## 3  Thomas Jefferson  1801 1809      8
## 4  Thomas Jefferson  1801 1809      8
## 5     James Madison  1809 1817      8
## 6     James Madison  1809 1817      8
tail(pres3)
##         PRESIDENT START  END LENGTH
## 60  Ronald Reagan  1981 1989      8
## 61    George Bush  1989 1993      4
## 62   Bill Clinton  1993 2001      8
## 63 George W. Bush  2001 2009      8
## 64   Barack Obama  2009 2017      8
## 65 Bernie Sanders  2017 2025      8

Now some data analysis… All President’s who served less than a full term in office:

p <- pres3[pres3$LENGTH < 4, ]
p
##                 PRESIDENT START  END LENGTH
## 14 William Henry Harrison  1841 1841      0
## 17         Zachary Taylor  1849 1850      1
## 18       Millard Fillmore  1850 1853      3
## 29      James A. Garfield  1881 1881      0
## 43      Warren G. Harding  1921 1923      2
## 53        John F. Kennedy  1961 1963      2
## 58         Gerald R. Ford  1974 1977      3