While working as a postdoc at The University of Chicago, I did a side-project for a professor, looking to quantify landcover types by county using the National Landcover Database across multiple years. The following is a script that I developed to visualize the dataset we developed from that collaboration.

library(rgdal)
## Warning: package 'rgdal' was built under R version 3.2.5
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.2.5
## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-3
library(maptools)
## Checking rgeos availability: TRUE
library(raster)
## Warning: package 'raster' was built under R version 3.2.5
library(ggplot2)
library(RColorBrewer)

I used a quartile split custom function that I found on stack overflow to later process the landcover data for discretizing the coloration of the chloropleth. The custom function can from a comment from Hadley Wickam (though I can’t find the original post).

# function to quantize data - Found on stackexchange, written by Hadley
qcut <- function(x, n) {
  cut(x, quantile(x, seq(0, 1, length = n + 1)), labels = seq_len(n),
      include.lowest = T)
}
# load filepaths
county.all.fp <- '~/Documents/berman/data/cb_2014_us_county_20m/'
mydata.fp <- '~/Documents/berman/data/allstate_cover.csv'

# load all state value data
mydata <- read.csv(mydata.fp, header = T, sep = ',', stringsAsFactors = F)

mydata$countygeoid <- formatC(mydata$countygeoid, digits = 5, width = 5, flag = 0)
# load county shapefile
setwd(county.all.fp)
county.all.shp <- readOGR(".","cb_2014_us_county_20m")
## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "cb_2014_us_county_20m"
## with 3220 features
## It has 9 fields
## Warning in readOGR(".", "cb_2014_us_county_20m"): Z-dimension discarded

This is what our landcover data look like:

head(mydata)
##   countyname countygeoid statefp openwater snowice devopen devlow devmed
## 1    Autauga       01001       1     1.280       0   4.155  1.370  0.436
## 2    Baldwin       01003       1     5.883       0   4.833  1.785  0.687
## 3    Barbour       01005       1     2.901       0   3.432  0.759  0.212
## 4       Bibb       01007       1     0.704       0   3.533  0.417  0.157
## 5     Blount       01009       1     0.969       0   4.722  1.314  0.308
## 6    Bullock       01011       1     1.159       0   3.249  0.459  0.073
##   devhigh barren desidueous evergreen mixedforest shrubscrub herbaceuous
## 1   0.117  0.274     17.583    18.998      13.258     17.910       1.730
## 2   0.208  0.609      0.493    23.697       2.223     12.659       5.137
## 3   0.065  0.141     20.360    27.359       8.543     16.737       2.420
## 4   0.035  0.289     27.060    28.885      13.203     11.732       3.749
## 5   0.082  0.467     32.951    10.193       6.101      5.973       4.577
## 6   0.019  0.084     17.885    25.205      11.978     14.873       1.769
##   haypasture cultivated woodywetlands emergentherbwetlands
## 1     11.603      5.947         4.944                0.396
## 2      5.723      9.491        24.503                2.069
## 3      6.829      5.503         4.469                0.270
## 4      5.426      0.579         4.038                0.193
## 5     27.043      4.565         0.688                0.047
## 6     10.759      3.678         8.025                0.783

Where landcover is presented as the percent total coverage of the county.

This is what the base-map looks like

plot(county.all.shp)

We only have data for the contiguous U.S. so we’ll remove the extra states/territories, which will make the map much more presentable.

# transform shapefile to match projection of landcover projection
# county.all.shp <- spTransform(county.all.shp, coverraster.r@crs)
# remove states and territories not in landcover map
# 02 = Alaska; 11 = D.C.; 15 = Hawaii ; 72 = Puerto Rico
states.rm      <- c("02","11","15","72") 
county.all.shp <- county.all.shp[!as.character(county.all.shp$STATEFP)
                                 %in% states.rm, ]
# sort county data by statefip then countyfip
county.all.shp <- county.all.shp[order(county.all.shp$GEOID),]
plot(county.all.shp)

Much better. The journey to pretty chloropleths continues…

# in my current script this command is necessary. The warning can be ignored (if you're from the future when support for gpclibPermit has been withdrawn, my apologies)
gpclibPermit()
## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release
## [1] TRUE
# fortify county shape data to make into data.frame
tract_geom <- fortify(model = county.all.shp, region="GEOID")
# tract_geom$id <- tract_geom$GEOID
# merge county and land cover value data
tract_poly <- merge(mydata,tract_geom, by.x = "countygeoid", by.y = "id", all = F)
tract_poly <- tract_poly[order(tract_poly$order),]

Now the hard work is done, we want to modify landcover data so that it is easier to present.

#combine similar categories
tract_poly$devall <- rowSums(data.frame(tract_poly$devopen,tract_poly$devlow,
                                        tract_poly$devmed,tract_poly$devhigh))

# quantize landcover for easier visualization
tract_poly$desidueous_q <- qcut(tract_poly$desidueous, 8)
tract_poly$evergreen_q  <- qcut(tract_poly$evergreen, 8)

tract_poly$haypasture_q <- qcut(tract_poly$haypasture, 8)
tract_poly$cultivated_q <- qcut(tract_poly$cultivated, 8)

tract_poly$devhigh_q    <- qcut(tract_poly$devhigh, 8)
tract_poly$devall_q     <- qcut(tract_poly$devall, 8)

tract_poly$openwater_q     <- qcut(tract_poly$openwater, 8)

Here is a plot of hay/pasture in the U.S. (a good proxy for rural land)

plot.haypasture <- ggplot(tract_poly, aes(x = long, y = lat, group = group)) +
  geom_polygon(color = "black", size = 0.5, aes(fill = haypasture_q)) +
  scale_fill_manual(values = brewer.pal(8,"Oranges")) +
  theme(panel.background = element_rect(fill = "white"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position="none")

plot.haypasture

Here’s what we were actually going for in the project. A coarse quantification of ‘green cover’ in the U.S. by count. Evergreen Forrest landcover.

plot.evergreen <- ggplot(tract_poly, aes(x = long, y = lat, group = group)) +
  geom_polygon(color = "black", size = 0.5, aes(fill = evergreen_q)) +
  scale_fill_manual(values = brewer.pal(8,"Greens")) +
  theme(panel.background = element_rect(fill = "white"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position="none")

plot.evergreen

And for good measure, all categories of developed land, whith developed land in yellows and undeveloped lands in blues, which is a wonderful contrast to the evergreen plot:

#combine similar categories
tract_poly$devall <- rowSums(data.frame(tract_poly$devopen,tract_poly$devlow,
                                        tract_poly$devmed,tract_poly$devhigh))

plot.devall <- ggplot(tract_poly, aes(x = long, y = lat, group = group)) +
  geom_polygon(color = "black", size = 0.1, alpha=.8, aes(fill = devall_q)) +
  scale_fill_manual(values = rev(brewer.pal(8,"YlGnBu"))) +
  theme(panel.background = element_rect(fill = "white"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position="none")

plot.devall