Mapping in R-Studio

Notes from R Data Visualization by Brooke Anderson on Coursera

All of the packages used here:

library(magrittr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(viridis)
library(readr)
library(ggmap)
library(gridExtra)
library(choroplethr)
library(choroplethrMaps)
library(tigris)
library(sp)
library(stringr)
library(GISTools)
library(raster)
library(grid)
setwd("C:/Users/Steve/Data-Science-Toolbox/Advanced_R")

Basic ggplot2 mapping

Lines

us_map <- map_data("state")

us_map %>% 
        filter(region %in% c("north carolina", "south carolina")) %>% 
        ggplot(aes(x = long, y = lat, group = group)) + 
        geom_path()

Filled States

us_map %>% 
        filter(region %in% c("north carolina", "south carolina")) %>%
        ggplot(aes(x = long, y = lat, group = group)) +
        geom_polygon(fill = "lightblue", color = "black")

No background

us_map %>% 
        filter(region %in% c("north carolina", "south carolina")) %>%
        ggplot(aes(x = long, y = lat, group = group)) +
        geom_polygon(fill = "lightblue", color = "black") + 
        theme_void()

All states

us_map %>% 
        ggplot(aes(x = long, y = lat, group = group)) +
        geom_polygon(fill = "lightblue", color = "black") + 
        theme_void()

Voting example

data(votes.repub)

votes.repub %>%
        tbl_df() %>%
        mutate(state = rownames(votes.repub),
               state = tolower(state)) %>%
        right_join(us_map, by = c("state" = "region")) %>%
        ggplot(aes(x = long, y = lat, group = group, fill = `1976`)) +
        geom_polygon(color = "black") + 
        theme_void() + 
        scale_fill_viridis(name = "Republican\nvotes (%)")

Serial Data county example

serial <- read_csv(paste0("https://raw.githubusercontent.com/",
                          "dgrtwo/serial-ggvis/master/input_data/",
                          "serial_podcast_data/serial_map_data.csv"))

serial <- serial %>%
        mutate(long = -76.8854 + 0.00017022 * x,
               lat  = 39.23822 + 1.371014e-04 * y,
               tower = Type == "cell-site")

maryland <- map_data('county', region = 'maryland')

baltimore <- maryland %>%
        filter(subregion %in% c("baltimore city", "baltimore"))

ggplot(baltimore, aes(x = long, y = lat, group = group)) + 
        geom_polygon(fill = "lightblue", color = "black") + 
        geom_point(data = serial, aes(group = NULL, color = tower)) + 
        theme_void() +
        scale_color_manual(name = "Cell tower", values = c("black", "red"))

Google Maps

beijing <- get_map("Beijing", zoom = 12)

ggmap(beijing) + 
        theme_void() + 
        ggtitle("Beijing, China")

Multiple maps

map_1 <- get_map("Estes Park", zoom = 12,
                 source = "google", maptype = "terrain") %>%
        ggmap(extent = "device")

map_2 <- get_map("Estes Park", zoom = 12,
                 source = "stamen", maptype = "watercolor") %>%
        ggmap(extent = "device")

map_3 <- get_map("Estes Park", zoom = 12,
                 source = "google", maptype = "hybrid") %>%
        ggmap(extent = "device")

grid.arrange(map_1, map_2, map_3, nrow = 1) 

Centered at a lat lon

get_map(c(2.35, 48.86), zoom = 10) %>%
        ggmap(extent = "device")

Geocode function lets you get the lat lon for any address

geocode("Supreme Court of the United States")
##   lon lat
## 1  NA  NA
geocode("1 First St NE, Washington, DC")
##         lon      lat
## 1 -77.00465 38.89051

Compute map distances

mapdist("Baltimore, MD",
        "1 First St NE, Washington, DC") %>%
        select(from, to, miles)
##            from                            to    miles
## 1 Baltimore, MD 1 First St NE, Washington, DC 37.41636

US States and Counties with Choroplethr

All counties

data(df_pop_county)

county_choropleth(df_pop_county)

Select states

county_choropleth(df_pop_county, state_zoom = c("colorado", "wyoming"))

Plotted over a reference map

county_choropleth(df_pop_county, state_zoom = c("north carolina"),
                  reference_map = TRUE)

Other data with FIPS county column mapping

floyd_events <- read_csv("./Advanced R/data/floyd_events.csv")
floyd_events %>% 
  group_by(fips) %>%
  dplyr::summarize(n_events = n()) %>%
  mutate(fips = as.numeric(fips)) %>%
  dplyr::rename(region = fips, 
         value = n_events) %>%
  county_choropleth(state_zoom = c("north carolina", "virginia"),
                    reference_map = TRUE)

Add on flood mapping and storm path

floyd_track <- read_csv("./Advanced R/data/floyd_track.csv")

floyd_events %>% 
  dplyr::group_by(fips) %>%
  dplyr::summarize(flood = sum(grepl("Flood", type))) %>%
  dplyr::mutate(fips = as.numeric(fips)) %>%
  dplyr::rename(region = fips, 
                value = flood) %>%
  county_choropleth(state_zoom = c("north carolina", "maryland", 
                                   "delaware", "new jersey",
                                   "virginia", "south carolina",
                                   "pennsylvania", "new york",
                                   "connecticut", "massachusetts",
                                   "new hampshire", "vermont",
                                   "maine", "rhode island"),
                    reference_map = TRUE) + 
  geom_path(data = floyd_track, aes(x = -longitude, y = latitude,
                                    group = NA),
            color = "red")

Adding new counties

floyd_floods <- floyd_events %>% 
  filter(grepl("Flood", type)) %>%
  mutate(fips = as.numeric(fips)) %>%
  group_by(fips) %>%
  dplyr::summarize(value = 1) %>%
  ungroup() %>%
  dplyr::rename(region = fips) 

floyd_map <- CountyChoropleth$new(floyd_floods)

floyd_map$set_zoom(c("north carolina", "maryland", 
                     "delaware", "new jersey",
                     "virginia", "south carolina",
                     "pennsylvania", "new york",
                     "connecticut", "massachusetts",
                     "new hampshire", "vermont",
                     "maine", "rhode island"))

floyd_map$render()

floyd_map$add_state_outline <- TRUE
floyd_map$ggplot_scale <- scale_fill_manual(values = c("yellow"),
                                            guide = FALSE)
floyd_xlim <- floyd_map$get_bounding_box()[c(1, 3)]
floyd_ylim <- floyd_map$get_bounding_box()[c(2, 4)]

a <- floyd_map$render() + 
  geom_path(data = floyd_track, aes(x = -longitude, y = latitude,
                                    group = NA),
            color = "red", size = 2, alpha = 0.6) + 
            xlim(floyd_map$get_bounding_box()[c(1, 3)]) + 
            ylim(floyd_map$get_bounding_box()[c(2, 4)])
            
b <- floyd_map$render_with_reference_map() + 
  geom_path(data = floyd_track, aes(x = -longitude, y = latitude,
                                    group = NA),
            color = "red", size = 2, alpha = 0.6) + 
            xlim(floyd_xlim) + 
            ylim(floyd_ylim)
            
grid.arrange(a, b, ncol = 2)

More advanced mapping - Spatial objects

tigris package to pull census tracts

denver_tracts <- tracts(state = "CO", county = 31, cb = TRUE)
plot(denver_tracts)

bbox will print out the bounding box of the spatial object (range of latitudes and longitudes covered by the data)

bbox(denver_tracts)
##          min        max
## x -105.10993 -104.60030
## y   39.61443   39.91425

Adding roads

roads <- primary_roads()
plot(denver_tracts, col = "lightblue")
plot(roads, add = TRUE, col = "darkred")

Converting from a spatial object to a dataframe

denver_tracts_df <- fortify(denver_tracts)
denver_tracts_df %>%
  dplyr::select(1:4) %>% dplyr::slice(1:5)
##        long      lat order  hole
## 1 -105.0532 39.78353     1 FALSE
## 2 -105.0502 39.78483     2 FALSE
## 3 -105.0477 39.78488     3 FALSE
## 4 -105.0439 39.78351     4 FALSE
## 5 -105.0436 39.78347     5 FALSE

Once its converted we can use it in ggplot2

denver_tracts_df %>%
  ggplot(aes(x = long, y = lat, group = group)) + 
  geom_polygon(fill = "lightblue", color = "black") + 
  theme_void()

The coord_map function in ggplot2 can help you in plotting maps with different projections.

usamap <- map_data("state") %>%
  ggplot(aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "black")

map_1 <- usamap + coord_map() + ggtitle("default") 
map_2 <- usamap + coord_map("gilbert") + ggtitle("+ coord_map('gilbert')")
map_3 <- usamap + coord_map("conic", lat0 = 30) + 
  ggtitle("+ coord_map('conic', lat0 = 30)")

grid.arrange(map_1, map_2, map_3, ncol = 1)

R as GIS

load("./Advanced R/data/fars_colorado.RData")
driver_data %>% 
  dplyr::select(1:5) %>% dplyr::slice(1:5)
##   state st_case county                date latitude
## 1     8   80001     51 2001-01-01 10:00:00 39.10972
## 2     8   80002     31 2001-01-04 19:00:00 39.68215
## 3     8   80003     31 2001-01-03 07:00:00 39.63500
## 4     8   80004     31 2001-01-05 20:00:00 39.71304
## 5     8   80005     29 2001-01-05 10:00:00 39.09733
map_data("county", region = "Colorado") %>%
  ggplot(aes(x = long, y = lat, group = subregion)) + 
  geom_polygon(color = "gray", fill = NA) + 
  theme_void() + 
  geom_point(data = driver_data,
             aes(x = longitud, y = latitude, group = NULL),
             alpha = 0.5, size = 0.7) 

Summarize that by county

county_accidents <- driver_data %>%
  dplyr::mutate(county = str_pad(county, width = 3,
                          side = "left", pad = "0")) %>%
  tidyr::unite(region, state, county, sep = "") %>%
  dplyr::group_by(region) %>%
  dplyr::summarize(value = n()) %>%
  dplyr::mutate(region = as.numeric(region))
county_accidents %>% slice(1:4)
## # A tibble: 4 x 2
##   region value
##    <dbl> <int>
## 1   8001   617
## 2   8003    77
## 3   8005   522
## 4   8007    41
county_choropleth(county_accidents, state_zoom = "colorado")

Now, we want to count up how many of these accidents occurred in each of the census tract shapes we pulled from the US Census earlier in this section using the tigris package. We can do this using the poly.counts function from GISTools package. However, before we can use that function, we need to make sure both objects are spatial objects, rather than dataframes, and that they have the same CRS.

The census tract data is already in a spatial object. We can change put the denver_fars object in a spatial object by resetting its coordinates attribute to specify which of its columns show longitude and latitude. Next, we need to specify what CRS the data has. Because it is coming from a dataframe of longitude and latitude data, the WSG84 system should be reasonable:

denver_fars <- driver_data %>% 
  filter(county == 31)
denver_fars_sp <- denver_fars 
coordinates(denver_fars_sp) <- c("longitud", "latitude")
proj4string(denver_fars_sp) <- CRS("+init=epsg:4326")

To be able to pair up polygons and points, the spatial objects need to have the same CRS. To help later with calculating the area of each polygon, we’ll use a projected CRS that is reasonable for Colorado and reproject the spatial data using the spTransform function:

denver_tracts_proj <- spTransform(denver_tracts, 
                                  CRS("+init=epsg:26954"))
denver_fars_proj <- spTransform(denver_fars_sp, 
                                CRS(proj4string(denver_tracts_proj)))

Drill down to just Denver

denver_fars <- driver_data %>% 
  filter(county == 31)

denver_fars_sp <- denver_fars 
coordinates(denver_fars_sp) <- c("longitud", "latitude")
proj4string(denver_fars_sp) <- CRS("+init=epsg:4326")

denver_tracts_proj <- spTransform(denver_tracts, 
                                  CRS("+init=epsg:26954"))
denver_fars_proj <- spTransform(denver_fars_sp, 
                                CRS(proj4string(denver_tracts_proj)))

plot(denver_tracts_proj)
plot(denver_fars_proj, add = TRUE, col = "red", pch = 1)

Now count the accidents by census tract and remap

tract_counts <- poly.counts(denver_fars_proj, denver_tracts_proj)
choropleth(denver_tracts, tract_counts)

poly.areas calculates the area of each polygon so you can get teh rate of accidents per population in the Denver census tracts thus:

choropleth(denver_tracts, 
           tract_counts / poly.areas(denver_tracts_proj))

Raster data

When mapping in R, you may also need to map raster data. You can think of raster data as data shown with pixels- the graphing region is divided into even squares, and color is constant within each square.

There is a function in the raster package that allows you to “rasterize” data. That is, you take spatial points data, divide the region into squares, and count the number of points (or other summary) within each square. When you do this, you need to set the x- and y-range for the raster squares. You can use bbox on a spatial object to get an idea of its ranges to help you specify these limits. You can use the res parameter in raster to set how large the raster boxes should be. For example, here is some code for rasterizing the accident data for Denver:

bbox(denver_fars_sp)
##                 min       max
## longitud -105.10973 -104.0122
## latitude   39.61715   39.8381
denver_raster <- raster(xmn = -105.09, ymn = 39.60,
                        xmx = -104.71, ymx = 39.86,
                        res = 0.02)

den_acc_raster <- rasterize(geometry(denver_fars_sp),
                            denver_raster,
                            fun = "count")

image(den_acc_raster, col = terrain.colors(5))

We can just add that to the base polt of Denver

plot(denver_tracts)
plot(den_acc_raster, add = TRUE, alpha = 0.5)

Making a map in a map using base grid

balt_counties <- map_data("county", region = "maryland") %>%
  mutate(our_counties = subregion %in% c("baltimore", "baltimore city"))
balt_map <- get_map("Baltimore County", zoom = 10) %>%
  ggmap(extent = "device") + 
  geom_polygon(data = filter(balt_counties, our_counties == TRUE),
               aes(x = long, y = lat, group = group),
               fill = "red", color = "darkred", alpha = 0.2)

maryland_map <- balt_counties %>%
  ggplot(aes(x = long, y = lat, group = group, fill = our_counties)) + 
  geom_polygon(color = "black") + 
  scale_fill_manual(values = c("white", "darkred"), guide = FALSE) + 
  theme_void() + 
  coord_map()

grid.draw(ggplotGrob(balt_map))
md_inset <- viewport(x = 0, y = 0, 
                     just = c("left", "bottom"),
                     width = 0.35, height = 0.35)
pushViewport(md_inset)
grid.draw(rectGrob(gp = gpar(alpha = 0.5, col = "white")))
grid.draw(rectGrob(gp = gpar(fill = NA, col = "black")))
grid.draw(ggplotGrob(maryland_map))
popViewport()