Get some Spatial polygons.
(I’m ignoring simple features, for now).
library(USAboundaries)
us <- us_states()
Fortify is the prototypical tidy for these complex data.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tibble)
library(ggplot2)
(tab <- fortify(us) %>% as_tibble())
## Regions defined for each Polygons
## # A tibble: 13,785 × 7
## long lat order hole piece id group
## <dbl> <dbl> <int> <lgl> <fctr> <chr> <fctr>
## 1 -124.3870 40.50495 1 FALSE 1 0 0.1
## 2 -124.3014 40.65964 2 FALSE 1 0 0.1
## 3 -124.1767 40.84362 3 FALSE 1 0 0.1
## 4 -124.1181 40.98926 4 FALSE 1 0 0.1
## 5 -124.1254 41.04850 5 FALSE 1 0 0.1
## 6 -124.1545 41.08716 6 FALSE 1 0 0.1
## 7 -124.1640 41.13867 7 FALSE 1 0 0.1
## 8 -124.1227 41.18973 8 FALSE 1 0 0.1
## 9 -124.0923 41.28769 9 FALSE 1 0 0.1
## 10 -124.0631 41.43958 10 FALSE 1 0 0.1
## # ... with 13,775 more rows
But the metadata are not in this table, which is good because it would be very wasteful, imagine copying out these columns onto the thousands of coordinates .
We also add a character version of the row number in zero-based form, as that’s what ggplot2 uses for id.
(mapdata <- as.data.frame(us) %>% as_tibble() %>%
mutate(id = as.character(row_number()-1)))
## # A tibble: 52 × 10
## statefp statens affgeoid geoid stusps name lsad
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 06 01779778 0400000US06 06 CA California 00
## 2 11 01702382 0400000US11 11 DC District of Columbia 00
## 3 12 00294478 0400000US12 12 FL Florida 00
## 4 13 01705317 0400000US13 13 GA Georgia 00
## 5 16 01779783 0400000US16 16 ID Idaho 00
## 6 17 01779784 0400000US17 17 IL Illinois 00
## 7 19 01779785 0400000US19 19 IA Iowa 00
## 8 21 01779786 0400000US21 21 KY Kentucky 00
## 9 22 01629543 0400000US22 22 LA Louisiana 00
## 10 24 01714934 0400000US24 24 MD Maryland 00
## # ... with 42 more rows, and 3 more variables: aland <chr>, awater <chr>,
## # id <chr>
Let’s nest the geometry, bind it back to the metadata, and patch together a simple S3 class for storing polygons.
(usa <- tab %>% group_by(id, group) %>% nest() %>% group_by(id) %>% nest() %>% inner_join(mapdata))
## Joining, by = "id"
## # A tibble: 52 × 11
## id data statefp statens affgeoid geoid stusps
## <chr> <list> <chr> <chr> <chr> <chr> <chr>
## 1 0 <tibble [6 × 2]> 06 01779778 0400000US06 06 CA
## 2 1 <tibble [1 × 2]> 11 01702382 0400000US11 11 DC
## 3 2 <tibble [4 × 2]> 12 00294478 0400000US12 12 FL
## 4 3 <tibble [1 × 2]> 13 01705317 0400000US13 13 GA
## 5 4 <tibble [1 × 2]> 16 01779783 0400000US16 16 ID
## 6 5 <tibble [1 × 2]> 17 01779784 0400000US17 17 IL
## 7 6 <tibble [1 × 2]> 19 01779785 0400000US19 19 IA
## 8 7 <tibble [1 × 2]> 21 01779786 0400000US21 21 KY
## 9 8 <tibble [1 × 2]> 22 01629543 0400000US22 22 LA
## 10 9 <tibble [2 × 2]> 24 01714934 0400000US24 24 MD
## # ... with 42 more rows, and 4 more variables: name <chr>, lsad <chr>,
## # aland <chr>, awater <chr>
This looks good, 52 rows of metadata with all the geometry embedded.
We can filter on metadata and unnest as needed.
usa %>% filter(as.numeric(aland) > 1e10) %>% select(awater, data, name)
## # A tibble: 48 × 3
## awater data name
## <chr> <list> <chr>
## 1 20483271881 <tibble [6 × 2]> California
## 2 31407883551 <tibble [4 × 2]> Florida
## 3 4947080103 <tibble [1 × 2]> Georgia
## 4 2397728105 <tibble [1 × 2]> Idaho
## 5 6200927458 <tibble [1 × 2]> Illinois
## 6 1076856589 <tibble [1 × 2]> Iowa
## 7 2393338940 <tibble [1 × 2]> Kentucky
## 8 23750204105 <tibble [1 × 2]> Louisiana
## 9 6983455225 <tibble [2 × 2]> Maryland
## 10 104031344385 <tibble [6 × 2]> Michigan
## # ... with 38 more rows
mapit <- usa %>% filter(aland > 1e10) %>% select(awater, data, name) %>% unnest() %>% unnest()
ggplot(mapit) + aes(x = long, y = lat, fill = as.numeric(awater), group = group) + geom_polygon()