Read in the entire NNV data set.

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(USAboundaries)
library(sp)
library(broom)
library(ggplot2)
library(classInt)
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(RColorBrewer)
library(knitr)

load("data/nnv.rda")

Let’s just do the MA gubernatorial election in 1800, since 1800 was an interesting year for electoral politics.

ma <- nnv %>%
  filter(state == "Massachusetts") %>%
  filter(office == "Governor") %>%
  filter(date == "1800")

We need to know some basic information about the election.

Candidates and affiliations? Only Federalists and Republicans, but there were 41 candidates.

ma$name %>% unique()
##  [1] "Caleb Strong"        "Elbridge Gerry"      "Moses Gill"         
##  [4] "Theodore Sedgwick"   "Fisher Ames"         "Edward H. Robbins"  
##  [7] "James Sullivan"      "Samuel Phillips"     "William Heath"      
## [10] "Francis Dana"        "John Blunt"          "Ebenezer Cox"       
## [13] "William Cushing"     "Andrew Dana"         "Henry Dearborn"     
## [16] "Samuel Dole"         "Oliver Keating"      "Lucas King"         
## [19] "Ebenezer Mattson"    "Simeon Strong"       "William Bickford"   
## [22] "Daniel Bixby"        "Daniel Bonney"       "Theophilus Bradbury"
## [25] "Micael Dodge"        "Robert Dodge"        "Enoch Dole"         
## [28] "John Dole"           "Daniel Dudley"       "Aaron Forbes"       
## [31] "James Gardner"       "Lewis Holden"        "Daniel Johnson"     
## [34] "Peter Kear"          "Nehemiah Knolton"    "Robert MacDaniel"   
## [37] "Theophilus Parsons"  "Walter Ware"         "William Cly West"   
## [40] "Joshua Whiting"      "John Woodman"
ma$name %>% unique() %>% length()
## [1] 41
ma$affiliation %>% unique()
## [1] "Federalist" "Republican" NA

How many interations? It appears there was only one ballot which makes this map a bit easier.

ma$iteration %>% unique()
## [1] "First Ballot"

How were the votes tallied? There is data for counties, districts, and towns. This is potentially very tricky if the data includes multiple counts of the same votes, e.g., if the county ennumeration includes all the towns. Let’s look at just Caleb Strong (who won the election, by the way) to see what we have.

cs <- ma %>%
  filter(name == "Caleb Strong") %>%
  select(state, county, district, town, populated.place, vote)

Let’s take records where the town and district and populated place records are blank (i.e., county records) to see what we have.

cs %>%
  filter(is.na(town) & is.na(district) & is.na(populated.place) & !is.na(county)) %>%
  select(-state)
## Source: local data frame [18 x 5]
## 
##        county district town populated.place vote
## 1  Barnstable       NA   NA              NA  309
## 2   Berkshire       NA   NA              NA 1232
## 3     Bristol       NA   NA              NA  785
## 4  Cumberland       NA   NA              NA 1231
## 5       Dukes       NA   NA              NA  121
## 6       Essex       NA   NA              NA 2617
## 7   Hampshire       NA   NA              NA 4274
## 8     Hancock       NA   NA              NA  394
## 9   Kennebeck       NA   NA              NA  545
## 10    Lincoln       NA   NA              NA  809
## 11  Middlesex       NA   NA              NA 1242
## 12  Nantucket       NA   NA              NA   29
## 13    Norfolk       NA   NA              NA  753
## 14   Plymouth       NA   NA              NA  732
## 15    Suffolk       NA   NA              NA 1646
## 16 Washington       NA   NA              NA  251
## 17  Worcester       NA   NA              NA 2126
## 18       York       NA   NA              NA  768

Now if we sum up just the town and districts, we will get the total votes.

cs %>%
  filter(!is.na(town) | !is.na(district) | !is.na(populated.place)) %>%
  group_by(county) %>%
  summarize(sum_town_votes = sum(vote))
## Source: local data frame [18 x 2]
## 
##        county sum_town_votes
## 1  Barnstable            309
## 2   Berkshire           1232
## 3     Bristol            785
## 4  Cumberland           1231
## 5       Dukes            121
## 6       Essex           2617
## 7   Hampshire           4274
## 8     Hancock            394
## 9   Kennebeck            545
## 10    Lincoln            809
## 11  Middlesex           1242
## 12  Nantucket             29
## 13    Norfolk            756
## 14   Plymouth            732
## 15    Suffolk           1646
## 16 Washington            251
## 17  Worcester           2126
## 18       York            768

Sure enough, they are identical. (With the curious exception of Norfolk, which differs by three votes; was an error made?) And if we sum up all the town and district votes we can check if there is a total vote column.

cs %>%
  filter(is.na(county) & is.na(town) & is.na(district) & is.na(populated.place))
## Source: local data frame [1 x 6]
## 
##           state county district town populated.place  vote
## 1 Massachusetts     NA       NA   NA              NA 19864
cs %>%
  filter(!is.na(town) | !is.na(district) | !is.na(populated.place)) %>%
  summarize(total_votes = sum(vote))
## Source: local data frame [1 x 1]
## 
##   total_votes
## 1       19867

They are off but only by the same three votes we noticed earlier. Therefore we can conclude that each vote that was cast in MA is represented in this data three times (oy vey!): once at the level of ennumeration at the town or equivalent (district or populated place), again at the level of the county, and again at the level of the state. That leaves the question whether the compilers did the tally, or whether these rows in the file represent reports in the newspapers. But at least we know what’s happening for this election, and can presume that a similar procedure was followed for the other 18385 elections in the data set.

What we need to do is to filter out the duplicate vote counting, then tidy the data so we have a single column “town” but keep the kind of town that it is.

Filter out the duplicates:

ma <- ma %>%
  filter(!is.na(town) | !is.na(district) | !is.na(populated.place)) %>%
  select(12:14, 18, 21:25)

Now tidy the town, district, and populated places into a single column.

# tidyr is brilliant
ma <- ma %>%
  gather(type_town, town, 2:4, na.rm = TRUE) 

Now we should be able to calculate who won:

ma %>%
  group_by(name) %>%
  summarize(votes = sum(vote)) %>%
  arrange(-votes) %>%
  kable()
name votes
Caleb Strong 19867
Elbridge Gerry 17187
Moses Gill 2018
Theodore Sedgwick 65
Fisher Ames 63
Edward H. Robbins 58
James Sullivan 54
Samuel Phillips 35
William Heath 24
Francis Dana 6
John Blunt 4
Andrew Dana 2
Ebenezer Cox 2
Ebenezer Mattson 2
Henry Dearborn 2
Lucas King 2
Oliver Keating 2
Samuel Dole 2
Simeon Strong 2
William Cushing 2
Aaron Forbes 1
Daniel Bixby 1
Daniel Bonney 1
Daniel Dudley 1
Daniel Johnson 1
Enoch Dole 1
James Gardner 1
John Dole 1
John Woodman 1
Joshua Whiting 1
Lewis Holden 1
Micael Dodge 1
Nehemiah Knolton 1
Peter Kear 1
Robert Dodge 1
Robert MacDaniel 1
Theophilus Bradbury 1
Theophilus Parsons 1
Walter Ware 1
William Bickford 1
William Cly West 1

So Caleb Strong (Federalist) defeated Elbridge Gerry (Republican) despite the only serious third contender Moses Gill (Federalist) getting over two thousand votes.

Making a county map should be easiest since we already have the county files in R in my USAboundaries package. Let’s get the MA counties in 1800, then find a list of the county names.

counties <- us_boundaries(as.Date("1800-06-01"), type = "county", states = "Massachusetts")
counties@data$name %>% sort()
##  [1] "BARNSTABLE" "BERKSHIRE"  "BRISTOL"    "CUMBERLAND" "DUKES"     
##  [6] "ESSEX"      "HAMPSHIRE"  "HANCOCK"    "KENNEBEC"   "LINCOLN"   
## [11] "MIDDLESEX"  "NANTUCKET"  "NORFOLK"    "PLYMOUTH"   "SUFFOLK"   
## [16] "WASHINGTON" "WORCESTER"  "YORK"

And let’s find the names of the counties in our election data:

ma$county %>% unique() %>% sort()
##  [1] "Barnstable" "Berkshire"  "Bristol"    "Cumberland" "Dukes"     
##  [6] "Essex"      "Hampshire"  "Hancock"    "Kennebeck"  "Lincoln"   
## [11] "Middlesex"  "Nantucket"  "Norfolk"    "Plymouth"   "Suffolk"   
## [16] "Washington" "Worcester"  "York"

Notice that Kennebec/Kennebeck are spelled differently. Fix that:

ma$county[ma$county == "Kennebeck"] <- "Kennebec"

Calculate breaks:

breaks <- ma %>%
  group_by(name, county) %>%
  summarize(votes = sum(vote)) %$%
  classIntervals(votes, 9, "jenks")

Now we can start mapping each candidate:

counties_df <- us_boundaries(as.Date("1800-06-01"),
                             type = "county", states = "Massachusetts",
                             format = "df")
## Loading required package: rgeos
## rgeos version: 0.3-8, (SVN revision 460)
##  GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921 
##  Polygon checking: TRUE
counties_df$name  <- counties_df$name %>% tolower()
# from http://docs.ggplot2.org/dev/vignettes/themes.html
theme_nothing <- function(base_size = 12, base_family = "Helvetica")
  {
  require(grid)
  theme_bw(base_size = base_size, base_family = base_family) %+replace%
      theme(
            rect             = element_blank(),
            line             = element_blank(),
#             text             = element_blank(),
            axis.text        = element_blank(),
            axis.title       = element_blank(),
            axis.ticks.margin = unit(0, "lines")
           )
  }
make_map <- function(candidate) {
  map <- ma %>%
    filter(name == candidate) %>%
    mutate(county = tolower(county)) %>%
    group_by(county) %>%
    summarize(votes = sum(vote)) %>% 
    mutate(Votes = cut(votes, breaks$brks)) %>%
    right_join(counties_df, by = c("county" = "name")) 
  
  ggplot(map,
         aes(x = long, y = lat, group = group, fill = Votes, map_id = id)) +
    geom_map(map = map, color = "darkgray") +
    coord_map() +
    ggtitle(paste("Election for MA Governor, 1800:", candidate)) +
    scale_fill_brewer(palette = "YlGnBu") +
    theme_nothing()
}

Now let’s make the maps:

ma$name %>%
  unique() %>%
  lapply(make_map)
## Loading required package: grid
## [[1]]

## 
## [[2]]

## 
## [[3]]