GIS Assignment

Author

Arif

nhdatafilecsv <- "NHD2016.csv"
usshapefile <- "cb_2014_us_county_5m/cb_2014_us_county_5m.shp"
nhfipscode <- "33"
scdatafile <- "SCGOP2016.csv"
scfipscode <- "45"

Load the tmap, tmaptools, and leaflet packages into your working session:

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tmap)
The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, will retire in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
The sp package is now running under evolution status 2
     (status 2 uses the sf package in place of rgdal)
library(tmaptools)
library(leaflet)
library(sf)
Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(leaflet.extras)
library(rio)
library(sp)

Step 1: Read in the NH election results file:

setwd("/Users/arifjadji/Desktop/DATA110/DATA110 Datasets/GIS")
nhdata <- import(nhdatafilecsv)

Eliminate columns for minor candidates and just use County, Clinton and Sanders columns:

nhdata <- nhdata[,c("County", "Clinton", "Sanders")]

Step 2: Decide what data to map ##Add columns for percents and margins:

nhdata$SandersMarginVotes <- nhdata$Sanders - nhdata$Clinton
nhdata$SandersPct <- (nhdata$Sanders) / (nhdata$Sanders + nhdata$Clinton) 
# Will use formatting later to multiply by a hundred
nhdata$ClintonPct <- (nhdata$Clinton) / (nhdata$Sanders + nhdata$Clinton)
nhdata$SandersMarginPctgPoints <- nhdata$SandersPct - nhdata$ClintonPct

Step 3: Get geographic data files ##Read in the shapefile for US states and counties:

library(raster)

Attaching package: 'raster'
The following object is masked from 'package:dplyr':

    select
library(rgdal)
Please note that rgdal will be retired during October 2023,
plan transition to sf/stars/terra functions using GDAL and PROJ
at your earliest convenience.
See https://r-spatial.org/r/2023/05/15/evolution4.html and https://github.com/r-spatial/evolution
rgdal: version: 1.6-7, (SVN revision 1203)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 3.4.2, released 2022/03/08
Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.2/Resources/library/rgdal/gdal
GDAL binary built with GEOS: FALSE 
Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.2/Resources/library/rgdal/proj
PROJ CDN enabled: FALSE
Linking to sp version:1.6-1
To mute warnings of possible GDAL/OSR exportToProj4() degradation,
use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
setwd("/Users/arifjadji/Desktop/DATA110/DATA110 Datasets/GIS")
usgeo <- shapefile("cb_2014_us_county_5m/cb_2014_us_county_5m.shp")
Warning: [vect] Z coordinates ignored

Do a quick plot (qtm stands for quick thematic map) of the shapefile and check its structure:

tmap_options(check.and.fix = TRUE)
qtm(usgeo)

Subset just the NH data from the US shapefile

nhgeo <- usgeo[usgeo$STATEFP==nhfipscode,]

tmap test plot of the New Hampshire data

qtm(nhgeo)

Match the two dataset variable headers for county (One is NAME and the other is County)

# Check if county names are in the same format in both files
str(nhgeo$NAME)
 chr [1:10] "Grafton" "Hillsborough" "Coos" "Belknap" "Rockingham" ...
str(nhdata$County)
 chr [1:10] "Belknap" "Carroll" "Cheshire" "Coos" "Grafton" "Hillsborough" ...
nhgeo$NAME  # they are the same
 [1] "Grafton"      "Hillsborough" "Coos"         "Belknap"      "Rockingham"  
 [6] "Cheshire"     "Strafford"    "Merrimack"    "Carroll"      "Sullivan"    

Order each data set by county name (alphabetically)

nhgeo <- nhgeo[order(nhgeo$NAME),]
nhdata <- nhdata[order(nhdata$County),]
if (identical(nhgeo$NAME,nhdata$County)) {
nhmap <- merge(nhgeo, nhdata, by.x = "NAME", by.y = "County")  # Merge geo and vote datasets
} else {stop}

Step 4: Merge geo data with results data using the merge function

# See the new data structure with
head(nhmap)
          NAME STATEFP COUNTYFP COUNTYNS       AFFGEOID GEOID LSAD       ALAND
1      Belknap      33      001 00873174 0500000US33001 33001   06  1036582289
2      Carroll      33      003 00873175 0500000US33003 33003   06 -1883508361
3     Cheshire      33      005 00873176 0500000US33005 33005   06  1830366195
4         Coos      33      007 00873177 0500000US33007 33007   06   353249502
5      Grafton      33      009 00873178 0500000US33009 33009   06   130959956
6 Hillsborough      33      011 00873179 0500000US33011 33011   06 -2025747080
     AWATER Clinton Sanders SandersMarginVotes SandersPct ClintonPct
1 177039345    3495    6005               2510  0.6321053  0.3678947
2 158933434    3230    5638               2408  0.6357691  0.3642309
3  57990901    5132   12441               7309  0.7079611  0.2920389
4  90773891    2013    3639               1626  0.6438429  0.3561571
5 105375486    6918   14245               7327  0.6731087  0.3268913
6  41604851   28147   39245              11098  0.5823392  0.4176608
  SandersMarginPctgPoints
1               0.2642105
2               0.2715381
3               0.4159222
4               0.2876858
5               0.3462175
6               0.1646783

Step 5: Create a static map with tmap’s qtm() function:

qtm(nhmap, "SandersMarginVotes")
Some legend labels were too wide. These labels have been resized to 0.63, 0.63, 0.63, 0.58, 0.54. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.

qtm(nhmap, "SandersMarginPctgPoints")

For more control over look and feel, use the tm_shape() function:

tm_shape(nhmap) +
  tm_fill("SandersMarginPctgPoints", title="Sanders Margin, Total Votes", palette = "PRGn") +
  tm_borders(alpha=.5) +
  tm_text("NAME", size=0.8)

Same code as above, but store the static map in a variable, and change the theme to “classic” style:

nhstaticmap <- tm_shape(nhmap) +
tm_fill("SandersMarginPctgPoints", title="Sanders Margin, Total Votes", palette = "PRGn") + #I like viridis
tm_borders(alpha=.5) +
tm_text("NAME", size=0.8) + 
tm_style("classic")

View the map

nhstaticmap

Part 6 Next up: Code for a basic interactive map, this time for Clinton percentages in NH

Create a palette

clintonPalette <- colorNumeric(palette = "Blues", domain=nhmap$ClintonPct)  # color palette for Clinton

and a pop-up window

library(scales)

Attaching package: 'scales'
The following object is masked from 'package:purrr':

    discard
The following object is masked from 'package:readr':

    col_factor
nhpopup <- paste0("County: ", nhmap$NAME, "<br /><br /> Sanders:  ", percent(nhmap$SandersPct), " Clinton: ", percent(nhmap$ClintonPct))  # popup content

Step 7: Now generate the interactive map:

# add the appropriate projection, WGS84
nhmap_projected <- spTransform(nhmap, "+proj=longlat +datum=WGS84")  
leaflet(nhmap_projected) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(stroke=FALSE, 
              smoothFactor = 0.2, 
              fillOpacity = .8, 
              popup=nhpopup,
              color= ~clintonPalette(nhmap$ClintonPct))

South Carolina data

setwd("/Users/arifjadji/Desktop/DATA110/DATA110 Datasets/GIS")
scdata <- rio::import(scdatafile)

South Carolina shapefile and Quick plot of scgeo SC geospatial object:

scgeo <- usgeo[usgeo@data$STATEFP=="45",]
qtm(scgeo)

Add a column with percent of votes for each candidate. Candidates are in columns 2-7:

candidates <- colnames(scdata[2:7])
for(i in 2:7){
  j = i + 7
  temp <- scdata[[i]] / scdata$Total
  scdata[[j]] <- temp
  colnames(scdata)[j] <- paste0(colnames(scdata)[i], "Pct")
}  
winner <- colnames(scdata[2:7])

Get winner in each precinct

for(i in 1:nrow(scdata)){
  scdata$winner[i] <- names(which.max(scdata[i,2:7]))
}

Import spreadsheet with percent of adult population holding at least a 4-yr college degree

setwd("/Users/arifjadji/Desktop/DATA110/DATA110 Datasets/GIS")
sced <- rio::import("SCdegree.xlsx")

Check if county names are in the same format in both files

# Change the county names to plain characters in scgeo:
scgeo$NAME <- as.character(scgeo$NAME)
# Order each data set by county name
scgeo <- scgeo[order(scgeo$NAME),]
scdata <- scdata[order(scdata$County),]
# check name and county columns are identical
if (identical(scgeo$NAME,scdata$County )) {
scmap <- merge(scgeo, scdata, by.x = "NAME", by.y = "County")
} else {stop}

str(scgeo$NAME)
 chr [1:46] "Abbeville" "Aiken" "Allendale" "Anderson" "Bamberg" "Barnwell" ...
str(scdata$County)
 chr [1:46] "Abbeville" "Aiken" "Allendale" "Anderson" "Bamberg" "Barnwell" ...

Instead of just coloring the winner, let’s color by strength of win with multiple layers

# Use same intensity for all - get minimum and maximum for the top 3 combined
minpct <- min(c(scdata$`Donald J TrumpPct`, scdata$`Marco RubioPct`, scdata$`Ted CruzPct`))
maxpct <- max(c(scdata$`Donald J TrumpPct`, scdata$`Marco RubioPct`, scdata$`Ted CruzPct`))

Create leaflet palettes for each layer of the map:

trumpPalette <- colorNumeric(palette = "Purples", domain=c(minpct, maxpct))
rubioPalette <- colorNumeric(palette = "Reds", domain = c(minpct, maxpct))
cruzPalette <- colorNumeric(palette = "Oranges", domain = c(minpct, maxpct))

winnerPalette <- colorFactor(palette=c("#984ea3", "#e41a1c"), domain = scmap$winner)
edPalette <- colorNumeric(palette = "Blues", domain=scmap$PctCollegeDegree)

Create a pop-up:

scpopup <- paste0("<b>County: ", scmap$NAME, "<br />Winner: ", scmap$winner, "</b><br /><br />Trump: ", percent(scmap$`Donald J TrumpPct`), "<br />Rubio: ", percent(scmap$`Marco RubioPct`), "<br />Cruz: ", percent(scmap$`Ted CruzPct`), "<br /><br />Pct w college ed: ", sced$PctCollegeDegree, "% vs state-wide avg of 25%")

Add the projection we know from the NH map we’ll need for this data on a Leaflet map:

scmap <- sp::spTransform(scmap, "+proj=longlat +datum=WGS84")

Basic interactive map showing winner in each county:

leaflet(scmap) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2,
              fillOpacity = .75,
              popup=scpopup, 
              color= ~winnerPalette(scmap$winner),
              group="Winners" ) %>%
    addLegend(position="bottomleft", colors=c("#984ea3", "#e41a1c"), labels=c("Trump", "Rubio"))

Put top 3 candidates in their own layers and add education layer, store in scGOPmap2 variable

scGOPmap <- leaflet(scmap) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2,
              fillOpacity = .75,
              popup=scpopup, 
              color= ~winnerPalette(scmap$winner),
              group="Winners"  ) %>% 
    addLegend(position="bottomleft", colors=c("#984ea3", "#e41a1c"), labels=c("Trump", "Rubio"))  %>%

  addPolygons(stroke=TRUE,
     weight=1,
     smoothFactor = 0.2, 
     fillOpacity = .75, 
     popup=scpopup, 
     color= ~trumpPalette(scmap$`Donald J TrumpPct`),
     group="Trump") %>%

  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2, 
              fillOpacity = .75, 
              popup=scpopup, 
              color= ~rubioPalette(scmap$`Marco RubioPct`),
              group="Rubio") %>%

  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2, 
              fillOpacity = .75, 
              popup=scpopup, 
              color= ~cruzPalette(scmap$`Ted CruzPct`),
              group="Cruz") %>%

  addPolygons(stroke=TRUE,
              weight=1,
              smoothFactor = 0.2, 
              fillOpacity = .75, 
              popup=scpopup, 
              color= ~edPalette(sced$PctCollegeDegree), #this data is in the sced table, not scmaps
              group="College degs") %>%

  addLayersControl(
      baseGroups=c("Winners", "Trump", "Rubio", "Cruz", "College degs"),
      position = "bottomleft",
      options = layersControlOptions(collapsed = FALSE))

# Now display the map
scGOPmap