Reading the dataset

nhdata <- rio::import("NHD2016.xlsx")
glimpse(nhdata)
## Rows: 10
## Columns: 29
## $ County           <chr> "Belknap", "Carroll", "Cheshire", "Coos", "Grafton...
## $ Adams            <dbl> 4, 2, 3, 1, 4, 13, 4, 17, 3, 2
## $ Burke            <dbl> 4, 3, 7, 6, 9, 35, 7, 20, 9, 7
## $ Clinton          <dbl> 3495, 3230, 5132, 2013, 6918, 28147, 12250, 22829,...
## $ `De La Fuente`   <dbl> 5, 4, 8, 4, 4, 36, 6, 19, 7, 2
## $ Elbot            <dbl> 1, 2, 4, NA, 1, 12, 3, 12, 1, NA
## $ French           <dbl> 1, 2, 2, NA, 3, 6, 5, 7, 2, 1
## $ Greenstein       <dbl> NA, 2, 2, 1, 1, 7, 6, 3, 6, 1
## $ Hewes            <dbl> NA, NA, 1, NA, 1, 10, 2, 4, NA, NA
## $ Hutton           <dbl> NA, NA, NA, NA, NA, 5, 1, 3, 4, 1
## $ Judd             <dbl> 2, 1, 4, 1, 2, 13, 6, 9, 4, 2
## $ Kelso            <dbl> 4, 1, 3, NA, 2, 12, 3, 14, 3, 4
## $ Lipscomb         <dbl> NA, NA, 2, 1, 2, 2, 2, 4, NA, 1
## $ Locke            <dbl> 2, 1, 1, NA, 1, 5, 8, 9, 3, 2
## $ Lovitt           <dbl> 2, 2, 1, 1, 1, 4, 3, 4, 2, 1
## $ `McGaughey, Jr.` <dbl> NA, 2, 1, 1, NA, 4, 2, 8, 1, NA
## $ Moroz            <dbl> 1, NA, NA, NA, 2, 24, NA, NA, NA, NA
## $ `O'Donnell, Jr.` <dbl> 2, 5, 1, 1, NA, 11, 3, 2, 1, NA
## $ `O'Malley`       <dbl> 35, 20, 42, 20, 41, 202, 78, 123, 72, 27
## $ Sanders          <dbl> 6005, 5638, 12441, 3639, 14245, 39245, 18107, 3106...
## $ Schwass          <dbl> 6, 4, 5, 4, 5, 32, 21, 41, 20, 4
## $ Sloan            <dbl> 2, 1, 2, 1, NA, 4, 1, 2, NA, 1
## $ Sonnino          <dbl> NA, NA, 1, 2, NA, 6, 4, 3, 1, NA
## $ Steinberg        <dbl> NA, NA, 2, NA, 1, 11, 4, NA, 2, 1
## $ Supreme          <dbl> 10, 11, 14, 1, 9, 82, 39, 54, 31, 14
## $ Thistle          <dbl> 12, 6, 6, 10, 14, 91, 24, 40, 15, 5
## $ Valentine        <dbl> 2, NA, 1, NA, NA, 13, 1, 3, 2, 2
## $ Weil             <dbl> NA, NA, 1, NA, NA, 4, 3, NA, NA, NA
## $ Wolfe            <dbl> 3, NA, 6, NA, 5, 17, 4, 12, 6, 1

Choosing the data for only two main candidates

nhdata <- nhdata[, c("County", "Clinton", "Sanders")]
glimpse(nhdata)
## Rows: 10
## Columns: 3
## $ County  <chr> "Belknap", "Carroll", "Cheshire", "Coos", "Grafton", "Hills...
## $ Clinton <dbl> 3495, 3230, 5132, 2013, 6918, 28147, 12250, 22829, 8813, 2497
## $ Sanders <dbl> 6005, 5638, 12441, 3639, 14245, 39245, 18107, 31065, 15881,...

Adding columns for percents and margins of votes

nhdata <- nhdata %>%
  mutate(SandersMarginVotes = Sanders - Clinton,
         SandersPct = SandersMarginVotes / (Sanders + Clinton),
         ClintonPct = (Clinton - Sanders) / (Sanders + Clinton),
         SandersMarginPctgPoints = SandersPct - ClintonPct)
glimpse(nhdata)
## Rows: 10
## Columns: 7
## $ County                  <chr> "Belknap", "Carroll", "Cheshire", "Coos", "...
## $ Clinton                 <dbl> 3495, 3230, 5132, 2013, 6918, 28147, 12250,...
## $ Sanders                 <dbl> 6005, 5638, 12441, 3639, 14245, 39245, 1810...
## $ SandersMarginVotes      <dbl> 2510, 2408, 7309, 1626, 7327, 11098, 5857, ...
## $ SandersPct              <dbl> 0.2642105, 0.2715381, 0.4159222, 0.2876858,...
## $ ClintonPct              <dbl> -0.2642105, -0.2715381, -0.4159222, -0.2876...
## $ SandersMarginPctgPoints <dbl> 0.5284211, 0.5430762, 0.8318443, 0.5753715,...

Reading the shape file for the geographic data

usgeo <- st_read("cb_2014_us_county_5m.shp")
## Reading layer `cb_2014_us_county_5m' from data source `C:\Users\OCHO3\OneDrive - montgomerycollege.edu\0 Math\0 Teaching\DATA Science\DATA 110\GIS Lab\cb_2014_us_county_5m.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XYZ
## bbox:           xmin: -179.1473 ymin: -14.55255 xmax: 179.7785 ymax: 71.35256
## z_range:        zmin: 0 zmax: 0
## geographic CRS: NAD83
str(usgeo)
## Classes 'sf' and 'data.frame':   3233 obs. of  10 variables:
##  $ STATEFP : chr  "01" "13" "19" "40" ...
##  $ COUNTYFP: chr  "059" "111" "109" "115" ...
##  $ COUNTYNS: chr  "00161555" "00351094" "00465243" "01101845" ...
##  $ AFFGEOID: chr  "0500000US01059" "0500000US13111" "0500000US19109" "0500000US40115" ...
##  $ GEOID   : chr  "01059" "13111" "19109" "40115" ...
##  $ NAME    : chr  "Franklin" "Fannin" "Kossuth" "Ottawa" ...
##  $ LSAD    : chr  "06" "06" "06" "06" ...
##  $ ALAND   : num  1.64e+09 1.00e+09 2.52e+09 1.22e+09 2.13e+09 ...
##  $ AWATER  : num  32904833 13560697 4154722 35708892 22356541 ...
##  $ geometry:sfc_MULTIPOLYGON of length 3233; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:9, 1:3] -88.2 -88.2 -88.2 -88.1 -87.5 ...
##   ..- attr(*, "class")= chr [1:3] "XYZ" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:9] "STATEFP" "COUNTYFP" "COUNTYNS" "AFFGEOID" ...

Extracting and mapping New Hampshire geographic data using FIPS code 33

nhgeo <- filter(usgeo, STATEFP=="33")
qtm(nhgeo)

Sorting both data sets by county name and checking if the two county columns are identical

nhgeo <- nhgeo[order(nhgeo$NAME),]
nhdata <- nhdata[order(nhdata$County),]
identical(nhgeo$NAME, nhdata$County)
## [1] TRUE

Joining the two data sets

nhmap <- merge(nhgeo, nhdata, by.x = "NAME", by.y = "County")
str(nhmap)
## Classes 'sf' and 'data.frame':   10 obs. of  16 variables:
##  $ NAME                   : chr  "Belknap" "Carroll" "Cheshire" "Coos" ...
##  $ STATEFP                : chr  "33" "33" "33" "33" ...
##  $ COUNTYFP               : chr  "001" "003" "005" "007" ...
##  $ COUNTYNS               : chr  "00873174" "00873175" "00873176" "00873177" ...
##  $ AFFGEOID               : chr  "0500000US33001" "0500000US33003" "0500000US33005" "0500000US33007" ...
##  $ GEOID                  : chr  "33001" "33003" "33005" "33007" ...
##  $ LSAD                   : chr  "06" "06" "06" "06" ...
##  $ ALAND                  : num  1.04e+09 2.41e+09 1.83e+09 4.65e+09 4.43e+09 ...
##  $ AWATER                 : num  1.77e+08 1.59e+08 5.80e+07 9.08e+07 1.05e+08 ...
##  $ Clinton                : num  3495 3230 5132 2013 6918 ...
##  $ Sanders                : num  6005 5638 12441 3639 14245 ...
##  $ SandersMarginVotes     : num  2510 2408 7309 1626 7327 ...
##  $ SandersPct             : num  0.264 0.272 0.416 0.288 0.346 ...
##  $ ClintonPct             : num  -0.264 -0.272 -0.416 -0.288 -0.346 ...
##  $ SandersMarginPctgPoints: num  0.528 0.543 0.832 0.575 0.692 ...
##  $ geometry               :sfc_MULTIPOLYGON of length 10; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:33, 1:3] -71.7 -71.7 -71.7 -71.7 -71.7 ...
##   ..- attr(*, "class")= chr [1:3] "XYZ" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:15] "NAME" "STATEFP" "COUNTYFP" "COUNTYNS" ...

Creating a simple static map of Sanders’ margins by county in number of votes and mapping margins by percentage

qtm(nhmap, fill = "SandersMarginVotes")

qtm(nhmap, "SandersMarginPctgPoints")

Changing themes

tm_shape(nhmap) +
  tm_fill("SandersMarginVotes", title="Sanders Margin, Total Votes", palette = "PRGn") +
  tm_borders(alpha=.5) +
  tm_text("NAME", size=0.8)

Changing the color theme and the background color

tm_shape(nhmap) +
  tm_fill("SandersMarginVotes", title="Sanders Margin, Total Votes", palette = "PRGn") +
  tm_borders(alpha=.5) +
  tm_text("NAME", size=0.8) +
  tm_style("classic", bg.color = "lightblue")

The END