## Loading required package: knitr

1. Getting election results data

This document follows the tutorial at ComptuerWorld. I will be using the tmap, tmaptools, sf, rio, and leaflet libraries.

getwd()
## [1] "C:/Users/murrayl/OneDrive - KCIC/ETC/MC/DATA110/UNIT 5"
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tmap)
## Warning: package 'tmap' was built under R version 3.6.1
library(tmaptools)
## Warning: package 'tmaptools' was built under R version 3.6.1
library(sf)
## Warning: package 'sf' was built under R version 3.6.1
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.1
library(raster)
## Warning: package 'raster' was built under R version 3.6.1
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
library(rio)
## Warning: package 'rio' was built under R version 3.6.1
library(RColorBrewer)
install_formats("rio")
## [1] TRUE

To make the data easier to call, I will rename it and then import it. I am also only interested in candidates Hillary Clinton and Bernie Sanders, thus I will select the columns with only these candidates.

nhdatafile <- "NHD2016.xlsx"
nhdata <- rio::import(nhdatafile)
## Error in rio::import(nhdatafile): No such file
nhdata <- nhdata[,c("County", "Clinton", "Sanders")]
## Error in eval(expr, envir, enclos): object 'nhdata' not found

2. Decide what data to map

We decided to add columns for candidates’ margins of victory (or loss) and percent of the vote, again for now pretending there were votes cast only for the two main candidates.

# Add columns for percents and margins
nhdata$SandersMarginVotes <- nhdata$Sanders - nhdata$Clinton
## Error in eval(expr, envir, enclos): object 'nhdata' not found
nhdata$SandersPct <- (nhdata$Sanders - nhdata$Clinton) / (nhdata$Sanders + nhdata$Clinton) # Will use formatting later to multiply by a hundred 
## Error in eval(expr, envir, enclos): object 'nhdata' not found
nhdata$ClintonPct <- (nhdata$Clinton - nhdata$Sanders) / (nhdata$Sanders + nhdata$Clinton)
## Error in eval(expr, envir, enclos): object 'nhdata' not found
nhdata$SandersMarginPctgPoints <- nhdata$SandersPct - nhdata$ClintonPct
## Error in eval(expr, envir, enclos): object 'nhdata' not found

3. Get your geographic data

For this New Hampshire mapping project by county, we downloaded files from the Census Bureau’s Cartographic Boundary shapefiles page. We are interested in the file with the .shp extension. To make it easier to call, we’ll store the name of this file in a variable called usshapefile and then import it using tmaptool’s read_shape() function:

setwd("C:/Users/murrayl/OneDrive - KCIC/ETC/MC/DATA110/UNIT 5/GIS")
usshapefile <- "cb_2014_us_county_5m/cb_2014_us_county_5m.shp"
usgeo <- read_shape(file=usshapefile, as.sf = TRUE)
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
## Warning in readOGR(dir, base, verbose = FALSE, ...): Z-dimension discarded

Now we can run tmap’s quick thematic command, qtm() to ensure that we get a map of the U.S. with divisions.

qtm(usgeo)

We can also check the str() of the usgeo data to observe that it looks like a normal data frame, but includes the final geometry column with sfc_MULTIPOLYGON information.

str(usgeo)
## Classes 'sf' and 'data.frame':   3233 obs. of  10 variables:
##  $ STATEFP : Factor w/ 56 levels "01","02","04",..: 1 11 16 37 39 37 28 26 29 10 ...
##  $ COUNTYFP: Factor w/ 328 levels "001","003","005",..: 42 76 74 78 78 38 22 137 291 35 ...
##  $ COUNTYNS: Factor w/ 3233 levels "00023901","00025441",..: 120 430 738 1911 2024 1880 1399 1373 1490 298 ...
##  $ AFFGEOID: Factor w/ 3233 levels "0500000US01001",..: 30 442 844 2189 2302 2158 1669 1589 1764 344 ...
##  $ GEOID   : Factor w/ 3233 levels "01001","01003",..: 30 442 844 2189 2302 2158 1669 1589 1764 344 ...
##  $ NAME    : Factor w/ 1921 levels "Abbeville","Acadia",..: 620 592 945 1291 1665 692 320 1683 284 747 ...
##  $ LSAD    : Factor w/ 11 levels "00","03","04",..: 5 5 5 5 5 5 5 5 1 5 ...
##  $ ALAND   : Factor w/ 3233 levels "1000508842","1001064387",..: 1199 5 2047 452 1721 2091 1880 1194 2397 1215 ...
##  $ AWATER  : Factor w/ 3233 levels "0","10017640",..: 1626 414 1940 1718 1118 2724 2916 2228 1613 497 ...
##  $ geometry:sfc_MULTIPOLYGON of length 3233; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:9, 1:2] -88.2 -88.2 -88.2 -88.1 -87.5 ...
##   ..- attr(*, "class")= chr  "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr  "STATEFP" "COUNTYFP" "COUNTYNS" "AFFGEOID" ...

Now we want to extract New Hampshire from the geodata; we just need the state FIPS code for New Hampshire, which turns out to be 33.

nhgeo <- filter(usgeo, STATEFP=="33")

Now let’s check to see if the map looks right:

qtm(nhgeo)

4. Merge spatial and results data

In order to merge the data, we need a column shared by each data set and records that refer to the entity in exactly the same way. We can check to see if the county names in both data sets have the same structure.

#nhgeo dataset county structure
str(nhgeo$NAME)
##  Factor w/ 1921 levels "Abbeville","Acadia",..: 684 791 416 138 1470 334 1653 1131 282 1657
#nhdata dataset county structure
str(nhdata$County)
## Error in str(nhdata$County): object 'nhdata' not found

Unfortunately, the geospatial file lists counties as R factors, while they’re plain character text in the data. Let’s change the factors to character strings. Let’s also sort both datasets by county name, so they are easier to compare.

#changing geospatial file counties to characters
nhgeo$NAME <- as.character(nhgeo$NAME)

#sorting by county names
nhgeo <- nhgeo[order(nhgeo$NAME),]
nhdata <- nhdata[order(nhdata$County),]
## Error in eval(expr, envir, enclos): object 'nhdata' not found

Instead of manually looking, we can check if the two columns they are identical with the identical() function.

identical(nhgeo$NAME,nhdata$County)
## Error in identical(nhgeo$NAME, nhdata$County): object 'nhdata' not found

Great! Now we can join the two datasets. The tmaptool’s append_data() function is a convenient way to do this, because of its intuitive syntax and that is allows names two join columns to be different.

nhmap <- append_data(nhgeo, nhdata, key.shp = "NAME", key.data="County")
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
## Error in key.data %in% names(data): object 'nhdata' not found

Let’s check the new structure:

str(nhmap) 
## Error in str(nhmap): object 'nhmap' not found

5. Create a static map

Now, let’s create a simple static map of Sanders’ margins by county in number of votes and by percentage:

#by number of votes
qtm(nhmap, "SandersMarginVotes")
## Error in qtm(nhmap, "SandersMarginVotes"): object 'nhmap' not found
#by percentage
qtm(nhmap, "SandersMarginPctgPoints")
## Error in qtm(nhmap, "SandersMarginPctgPoints"): object 'nhmap' not found

For more control over the map’s colors, borders and such, use the tm_shape() function, which uses a ggplot2-like syntax to set fill, border and other attributes. The first line sets the geodata file to be mapped, while tm_fill() sets the data column to use for mapping color values. The “PRGn” palette argument is a ColorBrewer palette of purples and greens.

tm_shape(nhmap) +
tm_fill("SandersMarginVotes", title="Sanders Margin, Total Votes", palette = "PRGn") +
tm_borders(alpha=.5) +
tm_text("NAME", size=0.8)
## Error in as.list.environment(environment()): object 'nhmap' not found

There are also a few built-in tmap themes, such as tm_style_classic.

tm_shape(nhmap) +
  tm_fill("SandersMarginVotes", title="Sanders Margin, Total Votes", palette = "PRGn") +
  tm_borders(alpha=.5) +
  tm_text("NAME", size=0.8) + 
tm_style_classic()
## Error in as.list.environment(environment()): object 'nhmap' not found

Static maps can be saved using the tmap_save() function.

nhstaticmap <- tm_shape(nhmap) +
  tm_fill("SandersMarginVotes", title="Sanders Margin, Total Votes", palette = "PRGn") +
  tm_borders(alpha=.5) +
tm_text("NAME", size=0.8)
## Error in as.list.environment(environment()): object 'nhmap' not found
tmap_save(nhstaticmap, filename="nhdemprimary.jpg")
## Error in tmap_save(nhstaticmap, filename = "nhdemprimary.jpg"): object 'nhstaticmap' not found

6. Create palette and pop-ups for interactive map

Utilizing RStudio’s Leaflet package, the next map will let users click to see underlying data as well as switch between maps. For a Leaflet map, there are two extra things we’ll want to create in addition to the data we already have: a color palette and pop-up window contents. For palette, we specify the data range we’re mapping and what kind of color palette we want — both the particular colors and the type of color scale. There are four built-in types: colorNumeric, colorBin, colorQuantile, and colorFactor.

Let’s map where Hillary Clinton was strongest, the inverse of the Sanders maps. To map Clinton’s vote percentage, we could use the “Blues” palette with the colorNumeric function.

clintonPalette <- colorNumeric(palette = "Blues", domain=nhmap$ClintonPct)
## Error in colorNumeric(palette = "Blues", domain = nhmap$ClintonPct): object 'nhmap' not found

We’ll also want to add a pop-up window to create an interactive map, enabling users to click and see underlying data. For the pop-up window text display, we’ll want to turn the decimal numbers for votes such as 0.7865 into percentages like 78.7%. We could do it by writing a short formula, but the scales package has a percent() function to make this easier.

library("scales")
## Warning: package 'scales' was built under R version 3.6.1
nhpopup <- paste0("County: ", nhmap$NAME, "Sanders ", percent(nhmap$SandersPct), " - Clinton ", percent(nhmap$ClintonPct))
## Error in paste0("County: ", nhmap$NAME, "Sanders ", percent(nhmap$SandersPct), : object 'nhmap' not found

Now, let’s create a pop-up window. We are going to rename the county name column NAME to “County” so that it is more intuitive to view. The paste0() is a concatenate function that can join text and variable values into a single character string

nhmap <- dplyr::rename(nhmap, County = NAME)
## Error in dplyr::rename(nhmap, County = NAME): object 'nhmap' not found
nhpopup <- paste0("County: ", nhmap$County,
"Sanders ", percent(nhmap$SandersPct), " - Clinton ", percent(nhmap$ClintonPct))
## Error in paste0("County: ", nhmap$County, "Sanders ", percent(nhmap$SandersPct), : object 'nhmap' not found

7. Generate an interactive map

Let’s go over the code. leaflet(nhmap) creates a leaflet map object and sets nhmap as the data source. addProviderTiles(“CartoDB.Positron” ) sets the background map tiles to CartoDB’s attractive Positron design. There’s a list of free background tiles and what they look like on GitHub if you’d like to choose something else.

The addPolygons() function does the rest — putting the county shapes on the map and coloring them accordingly. stroke=FALSE says no border around the counties, fillOpacity sets the opacity of the colors, popupsets the contents of the popup window and color sets the palette — I’m not sure why the tilde is needed before the palette name, but that’s the function format — and what data should be mapped to the color.

leaflet(nhmap) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(stroke=FALSE, 
              smoothFactor = 0.2, 
              fillOpacity = .8, 
              popup=nhpopup, 
              color= ~clintonPalette(nhmap$ClintonPct)
              )
## Error in structure(list(options = options), leafletData = data): object 'nhmap' not found
# re-project
leaflet(nhmap_projected) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(stroke=FALSE, 
              smoothFactor = 0.2, 
              fillOpacity = .8, 
              popup=nhpopup, 
              color= ~clintonPalette(nhmap$ClintonPct)
  )
## Error in structure(list(options = options), leafletData = data): object 'nhmap_projected' not found

8. Add pallettes for a multi-layer map

ling on this, ex Let’s look at the GOP results in South Carolina among the top three candidates. We are using the South Carolina State Election Commission as well as Census Bureau data for education levels by county. First, let’s create a the South Carolina shapefile.

# South Carolina data
setwd("C:/Users/murrayl/OneDrive - KCIC/ETC/MC/DATA110/UNIT 5/GIS")
scdatafile <- "SCGOP2016.csv"
scfipscode <- "45"
scdata <- rio::import(scdatafile)

# South Carolina shapefile:
scgeo <- dplyr::filter(usgeo, STATEFP==scfipscode)

Now let’s look at the quick plot of the scgeo plot.

# Quick plot of scgeo SC geospatial object:
library(tmap)
library(tmaptools)
qtm(scgeo)

Now let’s add a column with percentage of votes for each candidates and winners in each precinct.

# Add a column with percent of votes for each candidate. Candidates are in columns 2-7:
candidates <- colnames(scdata[2:7])
for(i in 2:7){
  j = i + 7
  temp <- scdata[[i]] / scdata$Total
  scdata[[j]] <- temp
  colnames(scdata)[j] <- paste0(colnames(scdata)[i], "Pct")
}  
  
# Get winner in each precinct
for(i in 1:nrow(scdata)){
  scdata$winner[i] <- names(which.max(scdata[i,2:7]))
}

Now we need to import a spreadsheet with percent of adult population holding at least a 4-yr college degree and then check if county names are in the same format in both files.

# Import spreadsheet with percent of adult population holding at least a 4-yr college degree
setwd("C:/Users/murrayl/OneDrive - KCIC/ETC/MC/DATA110/UNIT 5/GIS")
sced <- rio::import("SCdegree.xlsx")

# Check if county names are in the same format in both files
str(scgeo$NAME)
##  Factor w/ 1921 levels "Abbeville","Acadia",..: 554 995 810 35 1073 523 1662 359 100 331 ...
str(scdata$County)
##  chr [1:46] "Abbeville" "Aiken" "Allendale" "Anderson" "Bamberg" ...

They are not, so let’s change the county names to plain characters, order them by county name, and check to see that they are identical.

# Change the county names to plain characters in scgeo:
scgeo$NAME <- as.character(scgeo$NAME)

# Order each data set by county name
scgeo <- scgeo[order(scgeo$NAME),]
scdata <- scdata[order(scdata$County),]

# Are the two county columns identical now? They should be:
identical(scgeo$NAME,scdata$County)
## [1] TRUE

Now, let’s add the election results and rename the column.

# Add the election results and rename county column
scmap <- append_data(scgeo, scdata, key.data = "County", key.shp = "NAME")
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
## Keys match perfectly.
scmap <- rename(scmap, County = NAME)
scmap <- append_data(scmap, sced, key.shp = "County", key.data = "County")
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
## Keys match perfectly.

In making my color palettes, I decided to use the same numerical scale for all three candidates. If I scaled color intensity for each candidate’s minimum and maximum, a candidate with 10% to 18% would have a map with the same color intensities as one who had 45% to 52% — giving a wrong impression of the losing candidate’s strength. So, first I calculated the minimum and maximum for the combined Trump/Rubio/Cruz county results:

minpct <- min(c(scmap$Donald.J.TrumpPct, scmap$Marco.RubioPct , scmap$Ted.CruzPct))
maxpct <- max(c(scmap$Donald.J.TrumpPct, scmap$Marco.RubioPct , scmap$Ted.CruzPct))

Now I can create a palette for each candidate using different colors but the same intensity range.

trumpPalette <- colorNumeric(palette = "Purples", domain=c(minpct, maxpct))
rubioPalette <- colorNumeric(palette = "Reds", domain = c(minpct, maxpct))
cruzPalette <- colorNumeric(palette = "Oranges", domain = c(minpct, maxpct))

I’ll also add palettes for the winner and education layers:

winnerPalette <- colorFactor(palette=c("#984ea3", "#e41a1c"), domain = scmap$winner)
edPalette <- colorNumeric(palette = "Blues", domain=scmap$PctCollegeDegree)

Finally, I’ll create a basic pop-up showing the county name, who won, the percentage for each candidate and percent of population with a college degree:

scpopup <- paste0("County: ", scmap$County, "Winner: ", scmap$winner, "Trump: ", percent(scmap$Donald.J.TrumpPct), "Rubio: ", percent(scmap$Marco.RubioPct), "Cruz: ", percent(scmap$Ted.CruzPct), "Pct w college ed: ", scmap$PctCollegeDegree, "% vs state-wide avg of 25%")

Finally, before mapping, I know that I’m going to need to add the same projection that I needed for the New Hampshire map. This code will add that projection to the scmap object:

scmap <- sf::st_transform(scmap, "+proj=longlat +datum=WGS84")

This code shows a basic map of winners by county. Note that because only Trump and Rubio won counties in South Carolina, we can set up the legend to show only their colors and names:

leaflet(scmap) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2,
              fillOpacity = .75,
              popup=scpopup, 
              color= ~winnerPalette(scmap$winner),
              group="Winners" ) %>%
    addLegend(position="bottomleft", colors=c("#984ea3", "#e41a1c"), labels=c("Trump", "Rubio"))

9. Add map layers and controls

A multi-layer map with layer controls starts off the same as our previous map, with one addition: A group name. In this case, each layer will be its own group, but it’s also possible to turn multiple layers on and off together.

The next step is to add additional polygon layers for each candidate and a final layer for college education, along with a layer control to wrap up the code. This time, we’ll store the map in a variable and then display it:

scGOPmap <- leaflet(scmap) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2,
              fillOpacity = .75,
              popup=scpopup, 
              color= ~winnerPalette(scmap$winner),
              group="Winners"  ) %>% 
    addLegend(position="bottomleft", colors=c("#984ea3", "#e41a1c"), labels=c("Trump", "Rubio"))  %>%
addPolygons(stroke=TRUE,
     weight=1,
     smoothFactor = 0.2, 
     fillOpacity = .75, 
     popup=scpopup, 
     color= ~trumpPalette(scmap$Donald.J.TrumpPct),
     group="Trump") %>% addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2, 
              fillOpacity = .75, 
              popup=scpopup, 
              color= ~rubioPalette(scmap$Marco.RubioPct),
              group="Rubio") %>%
  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2, 
              fillOpacity = .75, 
              popup=scpopup, 
              color= ~cruzPalette(scmap$Ted.CruzPct),
              group="Cruz") %>%
  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2, 
              fillOpacity = .75, 
              popup=scpopup, 
              color= ~edPalette(scmap$PctCollegeDegree),
              group="College degs") %>%
  addLayersControl(
      baseGroups=c("Winners", "Trump", "Rubio", "Cruz", "College degs"),
      position = "bottomleft",
      options = layersControlOptions(collapsed = FALSE))
scGOPmap

10. Save your interactive map

If you’re familiar with RMarkdown or Shiny, a Leaflet map can be embedded in an RMarkdown document or Shiny web application. If you’d like to use this map as an HTML page on a website or elsewhere, save a Leaflet map with the htmlwidget package’s saveWidget() function:

# install.packages("htmlwidgets")
library("htmlwidgets")
## Warning: package 'htmlwidgets' was built under R version 3.6.1
htmlwidgets::saveWidget(scGOPmap, file="scGOPwidget2.html")

You can also save the map with external resources such as jQuery and the Leaflet JavaScript code in a separate directory by using the selfcontained=FALSE argument and choosing the subdirectory for the dependency files:

# install.packages("htmlwidgets")
htmlwidgets::saveWidget(widget=scGOPmap, file="scGOPprimary_withdependencies.html", selfcontained=FALSE, libdir = "js")