Chapter 5 Part 2

Harold Nelson

2023-02-22

Setup

library(sf)

## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

library(rgdal)

## Loading required package: sp

## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## 
## rgdal: version: 1.5-32, (SVN revision 1176)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.4.2, released 2022/03/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE 
## Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-7
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.

library(ggplot2)      
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)        
library(scales)       
library(RColorBrewer) 
library(units)

## udunits database from /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/units/share/udunits/udunits2.xml

library(cowplot)

Import and Restructure the Data

The following chunk includes all of the data manipulation we needed for Part 1, copied from the Wimberly text.

okcounty <- st_read("ok_counties.shp", quiet = TRUE)
tpoint <- st_read("ok_tornado_point.shp", quiet = TRUE)
tpath <- st_read("ok_tornado_path.shp", quiet = TRUE)
class(okcounty)

## [1] "sf"         "data.frame"

## [1] "sf"         "data.frame"
glimpse(okcounty)

## Rows: 77
## Columns: 8
## $ STATEFP  <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP <chr> "077", "025", "011", "107", "105", "153", "001", "053", "059"…
## $ COUNTYNS <chr> "01101826", "01101800", "01101793", "01101841", "01101840", "…
## $ AFFGEOID <chr> "0500000US40077", "0500000US40025", "0500000US40011", "050000…
## $ GEOID    <chr> "40077", "40025", "40011", "40107", "40105", "40153", "40001"…
## $ NAME     <chr> "Latimer", "Cimarron", "Blaine", "Okfuskee", "Nowata", "Woodw…
## $ LSAD     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ geometry <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.0025 3…

tpoint_16_21 <- tpoint %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)
tpath_16_21 <- tpath %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)

The Spatial Join

The following code joins the tornado dataframe with the county dataframe. This is a spatial join.

countypnt <- st_join(tpoint_16_21, okcounty)
str(countypnt)

## Classes 'sf' and 'data.frame':   434 obs. of  11 variables:
##  $ om      : num  613662 613675 613676 613677 613678 ...
##  $ yr      : num  2016 2016 2016 2016 2016 ...
##  $ date    : chr  "2016-03-23" "2016-03-30" "2016-03-30" "2016-03-30" ...
##  $ STATEFP : chr  "40" "40" "40" "40" ...
##  $ COUNTYFP: chr  "001" "113" "105" "131" ...
##  $ COUNTYNS: chr  "01101788" "01101844" "01101840" "01101853" ...
##  $ AFFGEOID: chr  "0500000US40001" "0500000US40113" "0500000US40105" "0500000US40131" ...
##  $ GEOID   : chr  "40001" "40113" "40105" "40131" ...
##  $ NAME    : chr  "Adair" "Osage" "Nowata" "Rogers" ...
##  $ LSAD    : chr  "06" "06" "06" "06" ...
##  $ geometry:sfc_POINT of length 434; first list element:  'XY' num  -94.5 35.7
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:10] "om" "yr" "date" "STATEFP" ...

Reverse

Try reversing the order of the arguments.

Solution

countypnt2 <- st_join(okcounty,tpoint_16_21)
str(countypnt2)

## Classes 'sf' and 'data.frame':   436 obs. of  11 variables:
##  $ STATEFP : chr  "40" "40" "40" "40" ...
##  $ COUNTYFP: chr  "077" "025" "025" "025" ...
##  $ COUNTYNS: chr  "01101826" "01101800" "01101800" "01101800" ...
##  $ AFFGEOID: chr  "0500000US40077" "0500000US40025" "0500000US40025" "0500000US40025" ...
##  $ GEOID   : chr  "40077" "40025" "40025" "40025" ...
##  $ NAME    : chr  "Latimer" "Cimarron" "Cimarron" "Cimarron" ...
##  $ LSAD    : chr  "06" "06" "06" "06" ...
##  $ om      : num  619091 613905 613906 615486 615487 ...
##  $ yr      : num  2020 2016 2016 2017 2017 ...
##  $ date    : chr  "2020-05-22" "2016-05-16" "2016-05-16" "2017-06-25" ...
##  $ geometry:sfc_POLYGON of length 436; first list element: List of 1
##   ..$ : num [1:11, 1:2] -95.5 -95.3 -95.3 -94.9 -94.9 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:10] "STATEFP" "COUNTYFP" "COUNTYNS" "AFFGEOID" ...

Difference

Why are these different? How are they different?

Solution

These dataframes are similar, but there are 2 more rows in the result when okcounty is the first argument. The version with tpoint_16_21 guarantees a row for every tornado. The second version guarantees a row for every county. By default, it is a left join.

See https://geocompr.robinlovelace.net/spatial-operations.html

Count

… the tornadoes in each county.

Just copy the code from the text.

Solution

countypnt <- st_drop_geometry(countypnt)
countysum <- countypnt %>%
  group_by(GEOID) %>%
  summarize(tcnt = n())
glimpse(countysum)

## Rows: 75
## Columns: 2
## $ GEOID <chr> "40001", "40005", "40007", "40009", "40011", "40013", "40015", "…
## $ tcnt  <int> 6, 3, 4, 8, 1, 4, 10, 5, 7, 5, 3, 12, 10, 5, 5, 1, 7, 9, 7, 8, 2…

Make a Map

Start by making a dataframe you could use to map the count or density of tornadoes in each county. Just copy the code from the text.

Solution

countymap <- okcounty %>%
  left_join(countysum, by = "GEOID") %>%
  replace(is.na(.), 0) %>%
  mutate(area = st_area(okcounty),
         tdens = 10^6 * 10^3 * tcnt / area) %>%
  drop_units()
glimpse(countymap)

## Rows: 77
## Columns: 11
## $ STATEFP  <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP <chr> "077", "025", "011", "107", "105", "153", "001", "053", "059"…
## $ COUNTYNS <chr> "01101826", "01101800", "01101793", "01101841", "01101840", "…
## $ AFFGEOID <chr> "0500000US40077", "0500000US40025", "0500000US40011", "050000…
## $ GEOID    <chr> "40077", "40025", "40011", "40107", "40105", "40153", "40001"…
## $ NAME     <chr> "Latimer", "Cimarron", "Blaine", "Okfuskee", "Nowata", "Woodw…
## $ LSAD     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ tcnt     <dbl> 1, 12, 1, 10, 6, 2, 6, 0, 4, 9, 3, 10, 12, 1, 2, 8, 13, 4, 5,…
## $ geometry <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.0025 3…
## $ area     <dbl> 1890663261, 4766283042, 2427121029, 1657249513, 1503893122, 3…
## $ tdens    <dbl> 0.5289149, 2.5176851, 0.4120108, 6.0340944, 3.9896452, 0.6197…

ggplot(data = countymap) +
  geom_sf(aes(fill = tdens,color = "red")) +
  theme_void()

I added, color = “red” in the aes to make the boundaries clear.

Questions

What is the “.” in replace(is.na(.),0) for?

How do you use units in R?

Answers

Ask ChatGPT.

Centroids

Make a map using centroids. Show the count of Tornadoes by the size of the centroid.

Solution

okcntrd = st_centroid(countymap)

## Warning in st_centroid.sf(countymap): st_centroid assumes attributes are
## constant over geometries of x

ggplot() +
  geom_sf(data = okcntrd, aes(size = tcnt)) +
  geom_sf(data = okcounty, fill = NA) +
  theme_void()

Map Density as a Choropleth

Just copy the code from the text.

Solution

ggplot(data = countymap) +
  geom_sf(aes(fill = tdens)) +
  scale_fill_distiller(name = expression("Tornadoes/1000 km"^2), 
                       palette = "YlOrRd", 
                       breaks = pretty_breaks(),
                       direction = 1) +
  theme_void() +
  theme(legend.position = "bottom")

Questions

Suppose in the scale_fill_distiller() call, we left out “expression”.

Answer

ggplot(data = countymap) +
  geom_sf(aes(fill = tdens)) +
  scale_fill_distiller(name = "Tornadoes/1000 km^2", 
                       palette = "YlOrRd", 
                       breaks = pretty_breaks(),
                       direction = 1) +
  theme_void() +
  theme(legend.position = "bottom")

Other Color Schemes

Try some alternatives for the palette.

Solution

Here’s an example with RdPU.

ggplot(data = countymap) +
  geom_sf(aes(fill = tdens)) +
  scale_fill_distiller(name = expression("Tornadoes/1000 km"^2), 
                       palette = "RdPu", 
                       breaks = pretty_breaks(),
                       direction = 1) +
  theme_void() +
  theme(legend.position = "bottom")

Breaks

What happens if we drop breaks = …?

Solution

ggplot(data = countymap) +
  geom_sf(aes(fill = tdens)) +
  scale_fill_distiller(name = expression("Tornadoes/1000 km"^2), 
                       palette = "YlOrRd", 
                       direction = 1) +
  theme_void() +
  theme(legend.position = "bottom")

I hardly notice any difference.

Direction

What happens if we drop direction = 1?

Answer

ggplot(data = countymap) +
  geom_sf(aes(fill = tdens)) +
  scale_fill_distiller(name = expression("Tornadoes/1000 km"^2), 
                       palette = "YlOrRd", 
                       breaks = pretty_breaks()
                       ) +
  theme_void() +
  theme(legend.position = "bottom")

Dark becomes low and light becomes high.