This tutorial is part of Computerworld’s How to Make a Map with R in 10 (fairly) Easy Steps
by Sharon Machlis sharon_machlis@idg.com
Set various values needed, including names of files and FIPS codes for New Hampshire and South Carolina
nhdatafile <- "data/NHD2016.xlsx"
nhdatafilecsv <- "data/NHD2016.csv"
usshapefile <- "data/cb_2014_us_county_5m/cb_2014_us_county_5m.shp"
nhfipscode <- "33"
scdatafile <- "data/SCGOP2016.csv"
scfipscode <- "45"
Run any of the install.packages() commands below for packages that are not yet on your system
install.packages(“shiny”)
install.packages(“urltools”)
install.packages(“tmap”)
install.packages(“tmaptools”)
install.packages(“leaflet”)
install.packages(“leaflet.extras”)
install.packages(“rio”)
install.packages(“scales”)
install.packages(“htmlwidgets”)
install.packages(“sf”)
install.packages(“dplyr”)
##### ```
Load the tmap, tmaptools, and leaflet packages into your working session:
library(tidyverse) # my additions
## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#
library(scales) # referenced below
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# following per original script
library("tmap")
## Warning in fun(libname, pkgname): rgeos: versions of GEOS runtime 3.5.1-CAPI-1.9.1
## and GEOS at installation 3.5.0-CAPI-1.9.0differ
library("tmaptools")
library("leaflet")
library("sf")
## Linking to GEOS 3.5.1, GDAL 2.2.2, PROJ 4.9.2
library("leaflet.extras")
library("dplyr")
( top )
Start with the 2016 New Hampshire Democratic primary results, which are available from the New Hampshire secretary of state’s office as a downloadable Excel spreadsheet.
nhdata <- rio::import(nhdatafile)
If you have any problems with this, there is also a CSV version of the file – sometimes reading Excel between Mac and Windows can be tricky. Try nhdata <- rio::import(nhdatafilecsv)
Eliminate columns for minor candidates and just use County, Clinton and Sanders columns:
nhdata <- nhdata[,c("County", "Clinton", "Sanders")]
( top )
Add columns for percents and margins:
nhdata$SandersMarginVotes <- nhdata$Sanders - nhdata$Clinton
nhdata$SandersPct <- (nhdata$Sanders) / (nhdata$Sanders + nhdata$Clinton) # Will use formatting later to multiply by a hundred
nhdata$ClintonPct <- (nhdata$Clinton) / (nhdata$Sanders + nhdata$Clinton)
nhdata$SandersMarginPctgPoints <- nhdata$SandersPct - nhdata$ClintonPct
( top )
For this New Hampshire mapping project by county, I downloaded files from the Census Bureau’s Cartographic Boundary shapefiles page — these are smaller, simplified files designed for mapping projects where extraordinarily precise boundaries aren’t needed. (Files for engineering projects or redistricting tend to be considerably larger).
Unzipping the national county file, read in the shapefile for US states and counties:
usgeo <- read_shape(file=usshapefile, as.sf = TRUE)
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
## Warning in readOGR(dir, base, verbose = FALSE, ...): Z-dimension discarded
Do a quick plot of the shapefile and check its structure:
qtm(usgeo)
(pause to wait for map to render, may take a few seconds)
str(usgeo)
## Classes 'sf' and 'data.frame': 3233 obs. of 10 variables:
## $ STATEFP : Factor w/ 56 levels "01","02","04",..: 1 11 16 37 39 37 28 26 29 10 ...
## $ COUNTYFP: Factor w/ 328 levels "001","003","005",..: 42 76 74 78 78 38 22 137 291 35 ...
## $ COUNTYNS: Factor w/ 3233 levels "00023901","00025441",..: 120 430 738 1911 2024 1880 1399 1373 1490 298 ...
## $ AFFGEOID: Factor w/ 3233 levels "0500000US01001",..: 30 442 844 2189 2302 2158 1669 1589 1764 344 ...
## $ GEOID : Factor w/ 3233 levels "01001","01003",..: 30 442 844 2189 2302 2158 1669 1589 1764 344 ...
## $ NAME : Factor w/ 1921 levels "Abbeville","Acadia",..: 620 592 945 1291 1665 692 320 1683 284 747 ...
## $ LSAD : Factor w/ 11 levels "00","03","04",..: 5 5 5 5 5 5 5 5 1 5 ...
## $ ALAND : Factor w/ 3233 levels "1000508842","1001064387",..: 1199 5 2047 452 1721 2091 1880 1194 2397 1215 ...
## $ AWATER : Factor w/ 3233 levels "0","10017640",..: 1626 414 1940 1718 1118 2724 2916 2228 1613 497 ...
## $ geometry:sfc_MULTIPOLYGON of length 3233; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:9, 1:2] -88.2 -88.2 -88.2 -88.1 -87.5 ...
## ..- attr(*, "class")= chr "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
## ..- attr(*, "names")= chr "STATEFP" "COUNTYFP" "COUNTYNS" "AFFGEOID" ...
Subset just the NH data from the US shapefile
nhgeo <- usgeo[usgeo$STATEFP==nhfipscode,]
or
nhgeo <- dplyr::filter(usgeo, STATEFP == nhfipscode)
tmap test plot of the New Hampshire data
qtm(nhgeo)
( top )
Check the structure of the New Hampshire object
str(nhgeo)
## Classes 'sf' and 'data.frame': 10 obs. of 10 variables:
## $ STATEFP : Factor w/ 56 levels "01","02","04",..: 30 30 30 30 30 30 30 30 30 30
## $ COUNTYFP: Factor w/ 328 levels "001","003","005",..: 6 8 5 1 12 3 14 10 2 15
## $ COUNTYNS: Factor w/ 3233 levels "00023901","00025441",..: 1500 1501 1499 1496 1503 1498 1504 1502 1497 1505
## $ AFFGEOID: Factor w/ 3233 levels "0500000US01001",..: 1769 1770 1768 1765 1772 1767 1773 1771 1766 1774
## $ GEOID : Factor w/ 3233 levels "01001","01003",..: 1769 1770 1768 1765 1772 1767 1773 1771 1766 1774
## $ NAME : Factor w/ 1921 levels "Abbeville","Acadia",..: 684 791 416 138 1469 334 1651 1131 282 1657
## $ LSAD : Factor w/ 11 levels "00","03","04",..: 5 5 5 5 5 5 5 5 5 5
## $ ALAND : Factor w/ 3233 levels "1000508842","1001064387",..: 2494 1831 2526 67 1382 1424 3172 1997 1991 730
## $ AWATER : Factor w/ 3233 levels "0","10017640",..: 55 1942 3097 778 1327 2460 1852 2454 638 1817
## $ geometry:sfc_MULTIPOLYGON of length 10; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:327, 1:2] -72.3 -72.3 -72.3 -72.3 -72.3 ...
## ..- attr(*, "class")= chr "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
## ..- attr(*, "names")= chr "STATEFP" "COUNTYFP" "COUNTYNS" "AFFGEOID" ...
Check if county names are in the same format in both files
str(nhgeo$NAME)
## Factor w/ 1921 levels "Abbeville","Acadia",..: 684 791 416 138 1469 334 1651 1131 282 1657
str(nhdata$County)
## chr [1:10] "Belknap" "Carroll" "Cheshire" "Coos" "Grafton" ...
They’re not. Change the county names to plain characters in nhgeo:
nhgeo$NAME <- as.character(nhgeo$NAME)
Order each data set by county name
nhgeo <- nhgeo[order(nhgeo$NAME),]
nhdata <- nhdata[order(nhdata$County),]
Are the two county columns identical now? They should be:
identical(nhgeo$NAME,nhdata$County )
## [1] TRUE
Merge data with tmaptool’s append_data function
nhmap <- append_data(nhgeo, nhdata, key.shp = "NAME", key.data="County")
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
## Keys match perfectly.
See the new data structure with
str(nhmap)
## Classes 'sf' and 'data.frame': 10 obs. of 16 variables:
## $ STATEFP : Factor w/ 56 levels "01","02","04",..: 30 30 30 30 30 30 30 30 30 30
## $ COUNTYFP : Factor w/ 328 levels "001","003","005",..: 1 2 3 5 6 8 10 12 14 15
## $ COUNTYNS : Factor w/ 3233 levels "00023901","00025441",..: 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505
## $ AFFGEOID : Factor w/ 3233 levels "0500000US01001",..: 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774
## $ GEOID : Factor w/ 3233 levels "01001","01003",..: 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774
## $ NAME : chr "Belknap" "Carroll" "Cheshire" "Coos" ...
## $ LSAD : Factor w/ 11 levels "00","03","04",..: 5 5 5 5 5 5 5 5 5 5
## $ ALAND : Factor w/ 3233 levels "1000508842","1001064387",..: 67 1991 1424 2526 2494 1831 1997 1382 3172 730
## $ AWATER : Factor w/ 3233 levels "0","10017640",..: 778 638 2460 3097 55 1942 2454 1327 1852 1817
## $ Clinton : num 3495 3230 5132 2013 6918 ...
## $ Sanders : num 6005 5638 12441 3639 14245 ...
## $ SandersMarginVotes : num 2510 2408 7309 1626 7327 ...
## $ SandersPct : num 0.632 0.636 0.708 0.644 0.673 ...
## $ ClintonPct : num 0.368 0.364 0.292 0.356 0.327 ...
## $ SandersMarginPctgPoints: num 0.264 0.272 0.416 0.288 0.346 ...
## $ geometry :sfc_POLYGON of length 10; first list element: List of 1
## ..$ : num [1:33, 1:2] -71.7 -71.7 -71.7 -71.7 -71.7 ...
## ..- attr(*, "class")= chr "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr "STATEFP" "COUNTYFP" "COUNTYNS" "AFFGEOID" ...
( top )
The hard part is done: finding data, getting it into the right format and merging it with geospatial data. Now, creating a simple static map of Sanders’ margins by county in number of votes is easy.
Quick and easy maps as static images with tmap’s qtm() function:
qtm(nhmap, "SandersMarginVotes")
## Some legend labels were too wide. These labels have been resized to 0.62, 0.62, 0.62, 0.57, 0.53. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
qtm(nhmap, "SandersMarginPctgPoints")
For more control over look and feel, use the tm_shape() function:
tm_shape(nhmap) +
tm_fill("SandersMarginVotes", title="Sanders Margin, Total Votes", palette = "PRGn") +
tm_borders(alpha=.5) +
tm_text("NAME", size=0.8) +
tm_style_classic()
## Warning in tm_style_classic(): tm_style_classic is deprecated as of tmap
## version 2.0. Please use tm_style("classic", ...) instead
## Some legend labels were too wide. These labels have been resized to 0.62, 0.62, 0.62, 0.57, 0.53. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
Same code as above, but store the map in a variable:
nhstaticmap <- tm_shape(nhmap) +
tm_fill("SandersMarginVotes", title="Sanders Margin, Total Votes", palette = "PRGn") +
tm_borders(alpha=.5) +
tm_text("NAME", size=0.8) +
tm_style_classic()
## Warning in tm_style_classic(): tm_style_classic is deprecated as of tmap
## version 2.0. Please use tm_style("classic", ...) instead
View the map
nhstaticmap
## Some legend labels were too wide. These labels have been resized to 0.62, 0.62, 0.62, 0.57, 0.53. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
save the map to a jpg file with tmap’s save_tmap():
save_tmap(nhstaticmap, filename="nhdemprimary.jpg")
## Warning in save_tmap(nhstaticmap, filename = "nhdemprimary.jpg"): save_tmap
## is deprecated as of tmap version 2.0. Please use tmap_save instead
## Map saved to /home/dutky/mcData110/week9Gis/nhdemprimary.jpg
## Resolution: 1501.336 by 2937.385 pixels
## Size: 5.004452 by 9.791282 inches (300 dpi)
( top )
Next up: Code for a basic interactive map, this time for Clinton percentages in NH
First, create a palette
clintonPalette <- colorNumeric(palette = "Blues", domain=nhmap$ClintonPct)
and a pop-up window
library(scales)
nhpopup <- paste0("<b>County: ", nhmap$NAME, "</b><br />Sanders ", percent(nhmap$SandersPct), " - Clinton ", percent(nhmap$ClintonPct))
Rename the county column from NAME to County with dplyr’s rename function:
nhmap <- rename(nhmap, County = NAME)
( top )
Now the interactive map:
leaflet(nhmap) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(stroke=FALSE,
smoothFactor = 0.2,
fillOpacity = .8,
popup=nhpopup,
color= ~clintonPalette(nhmap$ClintonPct)
)
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs).
## Need '+proj=longlat +datum=WGS84'
re-project
nhmap_projected <- sf::st_transform(nhmap, "+proj=longlat +datum=WGS84")
leaflet(nhmap_projected) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(stroke=FALSE,
smoothFactor = 0.2,
fillOpacity = .8,
popup=nhpopup,
color= ~clintonPalette(nhmap$ClintonPct)
)
scdata <- rio::import(scdatafile)
South Carolina shapefile:
scgeo <- dplyr::filter(usgeo, STATEFP==scfipscode)
Quick plot of scgeo SC geospatial object:
qtm(scgeo)
Add a column with percent of votes for each candidate. Candidates are in columns 2-7:
candidates <- colnames(scdata[2:7])
for(i in 2:7){
j = i + 7
temp <- scdata[[i]] / scdata$Total
scdata[[j]] <- temp
colnames(scdata)[j] <- paste0(colnames(scdata)[i], "Pct")
}
Get winner in each precinct
for(i in 1:nrow(scdata)){
scdata$winner[i] <- names(which.max(scdata[i,2:7]))
}
Import spreadsheet with percent of adult population holding at least a 4-yr college degree
sced <- rio::import("data/SCdegree.xlsx")
Check if county names are in the same format in both files
str(scgeo$NAME)
## Factor w/ 1921 levels "Abbeville","Acadia",..: 554 993 810 35 1073 523 1662 359 100 331 ...
str(scdata$County)
## chr [1:46] "Abbeville" "Aiken" "Allendale" "Anderson" "Bamberg" ...
Change the county names to plain characters in scgeo:
scgeo$NAME <- as.character(scgeo$NAME)
Order each data set by county name
scgeo <- scgeo[order(scgeo$NAME),]
scdata <- scdata[order(scdata$County),]
Are the two county columns identical now? They should be:
identical(scgeo$NAME,scdata$County )
## [1] TRUE
Add the election results and rename county column
scmap <- append_data(scgeo, scdata, key.data = "County", key.shp = "NAME")
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
## Keys match perfectly.
scmap <- rename(scmap, County = NAME)
scmap <- append_data(scmap, sced, key.shp = "County", key.data = "County")
## Warning: This function is deprecated and has been migrated to github.com/
## mtennekes/oldtmaptools
## Keys match perfectly.
( top )
Instead of just coloring the winner, let’s color by strength of win with multiple layers Use same intensity for all - get minimum and maximum for the top 3 combined
minpct <- min(c(scmap$Donald.J.TrumpPct, scmap$Marco.RubioPct , scmap$Ted.CruzPct))
maxpct <- max(c(scmap$Donald.J.TrumpPct, scmap$Marco.RubioPct , scmap$Ted.CruzPct))
Create leaflet palettes for each layer of the map:
winnerPalette <- colorFactor(palette=c("#984ea3", "#e41a1c"), domain = scmap$winner)
trumpPalette <- colorNumeric(palette = "Purples", domain=c(minpct, maxpct))
rubioPalette <- colorNumeric(palette = "Reds", domain = c(minpct, maxpct))
cruzPalette <- colorNumeric(palette = "Oranges", domain = c(minpct, maxpct))
edPalette <- colorNumeric(palette = "Blues", domain=scmap$PctCollegeDegree)
Create a pop-up:
scpopup <- paste0("<b>County: ", scmap$County, "<br />Winner: ", scmap$winner, "</b><br /><br />Trump: ", percent(scmap$Donald.J.TrumpPct), "<br />Rubio: ", percent(scmap$Marco.RubioPct), "<br />Cruz: ", percent(scmap$Ted.CruzPct), "<br /><br />Pct w college ed: ", scmap$PctCollegeDegree, "% vs state-wide avg of 25%")
Add the projection we know from the NH map we’ll need for this data on a Leaflet map:
scmap <- sf::st_transform(scmap, "+proj=longlat +datum=WGS84")
Basic interactive map showing winner in each county:
leaflet(scmap) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(stroke=TRUE,
weight=1,
smoothFactor = 0.2,
fillOpacity = .75,
popup=scpopup,
color= ~winnerPalette(scmap$winner),
group="Winners"
) %>%
addLegend(position="bottomleft", colors=c("#984ea3", "#e41a1c"), labels=c("Trump", "Rubio"))
( top )
Put top 3 candidates in their own layers and add education layer, store in scGOPmap2 variable
scGOPmap2 <- leaflet(scmap) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(stroke=TRUE,
weight=1,
smoothFactor = 0.2,
fillOpacity = .75,
popup=scpopup,
color= ~winnerPalette(scmap$winner),
group="Winners"
) %>%
addLegend(position="bottomleft", colors=c("#984ea3", "#e41a1c"), labels=c("Trump", "Rubio")) %>%
addPolygons(stroke=TRUE,
weight=1,
smoothFactor = 0.2,
fillOpacity = .75,
popup=scpopup,
color= ~trumpPalette(scmap$Donald.J.TrumpPct),
group="Trump"
) %>%
addPolygons(stroke=TRUE,
weight=1,
smoothFactor = 0.2,
fillOpacity = .75,
popup=scpopup,
color= ~rubioPalette(scmap$Marco.RubioPct),
group="Rubio"
) %>%
addPolygons(stroke=TRUE,
weight=1,
smoothFactor = 0.2,
fillOpacity = .75,
popup=scpopup,
color= ~cruzPalette(scmap$Ted.CruzPct),
group="Cruz"
) %>%
addPolygons(stroke=TRUE,
weight=1,
smoothFactor = 0.2,
fillOpacity = .75,
popup=scpopup,
color= ~edPalette(scmap$PctCollegeDegree),
group="College degs"
) %>%
addLayersControl(
baseGroups=c("Winners", "Trump", "Rubio", "Cruz", "College degs"),
position = "bottomleft",
options = layersControlOptions(collapsed = FALSE)
) %>%
addSearchOSM()
View the scGOPmap2 map:
print(scGOPmap2)
( top )
save as a self-contained HTML file RPubs link
htmlwidgets::saveWidget(scGOPmap2, file="scGOPwidget2.html")
save as an HTML file with dependencies in another directory:
htmlwidgets::saveWidget(widget=scGOPmap2, file="scGOPprimary_withdependencies.html", selfcontained=FALSE, libdir = "js")
( top )