This analysis is donw using:
All code and key outputs (figures, tables) are presented.
For each section, a short interpretations is given focusing on:
Healy’s chapter showed how to build choropleth maps using
maps::map_data("state") and the
socviz::election data.
Using the 2016 U.S. Presidential state-level election results, let’s produce a state-level choropleth for the winning candidate, with a meaningful title and legend.
The socviz package contains the election
dataset.
# Loading election data
data("election")
glimpse(election)
## Rows: 51
## Columns: 22
## $ state <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California",…
## $ st <chr> "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL…
## $ fips <dbl> 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, …
## $ total_vote <dbl> 2123372, 318608, 2604657, 1130635, 14237893, 2780247, 164…
## $ vote_margin <dbl> 588708, 46933, 91234, 304378, 4269978, 136386, 224357, 50…
## $ winner <chr> "Trump", "Trump", "Trump", "Trump", "Clinton", "Clinton",…
## $ party <chr> "Republican", "Republican", "Republican", "Republican", "…
## $ pct_margin <dbl> 0.2773, 0.1473, 0.0350, 0.2692, 0.2999, 0.0491, 0.1364, 0…
## $ r_points <dbl> 27.72, 14.73, 3.50, 26.92, -29.99, -4.91, -13.64, -11.38,…
## $ d_points <dbl> -27.72, -14.73, -3.50, -26.92, 29.99, 4.91, 13.64, 11.38,…
## $ pct_clinton <dbl> 34.36, 36.55, 44.58, 33.65, 61.48, 48.16, 54.57, 53.09, 9…
## $ pct_trump <dbl> 62.08, 51.28, 48.08, 60.57, 31.49, 43.25, 40.93, 41.71, 4…
## $ pct_johnson <dbl> 2.09, 5.88, 4.08, 2.64, 3.36, 5.18, 2.96, 3.33, 1.58, 2.1…
## $ pct_other <dbl> 1.46, 6.29, 3.25, 3.13, 3.66, 3.41, 1.55, 1.88, 3.47, 1.8…
## $ clinton_vote <dbl> 729547, 116454, 1161167, 380494, 8753792, 1338870, 897572…
## $ trump_vote <dbl> 1318255, 163387, 1252401, 684872, 4483814, 1202484, 67321…
## $ johnson_vote <dbl> 44467, 18725, 106327, 29829, 478500, 144121, 48676, 14757…
## $ other_vote <dbl> 31103, 20042, 84762, 35440, 521787, 94772, 25457, 8327, 1…
## $ ev_dem <dbl> 0, 0, 0, 0, 55, 9, 7, 3, 3, 0, 0, 3, 0, 20, 0, 0, 0, 0, 0…
## $ ev_rep <dbl> 9, 3, 11, 6, 0, 0, 0, 0, 0, 29, 16, 0, 4, 0, 11, 6, 6, 8,…
## $ ev_oth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ census <chr> "South", "West", "West", "South", "West", "West", "Northe…
# Loading state map data
state_map <- map_data("state")
glimpse(state_map)
## Rows: 15,537
## Columns: 6
## $ long <dbl> -87.46201, -87.48493, -87.52503, -87.53076, -87.57087, -87.5…
## $ lat <dbl> 30.38968, 30.37249, 30.37249, 30.33239, 30.32665, 30.32665, …
## $ group <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ region <chr> "alabama", "alabama", "alabama", "alabama", "alabama", "alab…
## $ subregion <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
# Preparing election data
election_state <- election %>%
mutate(
region = tolower(state) # to join with state map data
)
# Joining map and election data
state_choropleth <- state_map %>%
left_join(election_state, by = "region")
# Creating the choropleth map
ggplot(state_choropleth, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = party, alpha = pct_margin), color = "white", linewidth = 0.2) +
coord_quickmap() +
scale_fill_manual(
values = c("Democratic" = "#2166AC", "Republican" = "#B2182B"),
name = "Winner"
) +
scale_alpha_continuous(
range = c(0.3, 1),
limits = c(0, 1),
name = "Winning Margin\n(Color opacity)",
labels = scales::percent_format(accuracy = 1)
) +
labs(
title = "2016 Presidential Election Results by State"
) +
theme_tufte() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "right",
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
Making two versions of the choropleth:
pct_margin = 0 (e.g., blue for Democratic wins, red for
Republican wins, with neutral near zero).# Preparing election data
state_choropleth <- state_choropleth %>%
mutate(
signed_margin = ifelse(party == "Democratic", -pct_margin, pct_margin) # since pct_margin is absolute value
)
# 1. Creating choropleth map with diverging color scale
ggplot(state_choropleth, aes(x = long, y = lat, group = group, fill = signed_margin)) +
geom_polygon(color = "white", linewidth = 0.2) +
coord_quickmap() +
scale_fill_gradient2(
low = "#2166AC", # Blue for Democratic wins
mid = "#F0F0F0", # Light Grey for close races
high = "#B2182B", # Red for Republican wins
midpoint = 0,
limits = c(-1, 1),
name = "Winning\nMargin",
labels = scales::percent_format(accuracy = 1)
) +
labs(
title = "2016 Presidential Election Results by State",
subtitle = "Red = Republican, Blue = Democratic"
) +
theme_tufte() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 9),
legend.position = "right",
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
# 2. Creating choropleth map with sequential color scale
ggplot(state_choropleth, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(alpha = pct_margin), color = "white", linewidth = 0.2) +
coord_quickmap() +
scale_alpha_continuous(
range = c(0.3, 1),
limits = c(0, 1),
name = "Winning\nMargin",
labels = scales::percent_format(accuracy = 1)
) +
labs(
title = "2016 Presidential Election Results by State"
) +
theme_tufte() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "right",
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
The diverging color scale is better for seeing who won because blue states clearly show Democratic wins and red states show Republican wins. The colors directly indicate the winner without requiring additional interpretation.
The sequential color scale is better for comparing competitiveness because it focuses only on margin size regardless of party. Light colors indicate close races and dark colors indicate landslide victories, making swing states and safe states easy to distinguish.
Choropleths exaggerate importance because they show geographic area, not population or votes. Big empty states like Wyoming and Montana take up huge visual space despite having tiny populations and few electoral votes, while small dense states like New Jersey barely show up on the map even though they have way more people and political weight. The map tricks the eye into thinking land area equals importance.
As Wilke and Healy emphasized, choropleth maps encode data by geographic area rather than by the quantity being measured. In elections, what matters is votes and electoral power, not land area, yet our eyes are drawn to spatial extent.
An alternative - A cartogram heatmap (tile grid map) would effectively complement this choropleth by representing each state as an equal-sized square arranged in approximate geographic layout, ensuring Wyoming receives the same visual weight as California. This could be implemented using the statebins package in R or geom_tile(), where each state occupies uniform space and readers can accurately compare states without geographic area bias dominating visual interpretation.
sf and projectionsWilke emphasizes the importance of projections and
choropleth mapping. In this section, let’s explore
different projections using sf object of world
countries.
The spData package provides world as an
sf object.
if (!require("spDataLarge", quietly = TRUE)) install.packages("spDataLarge", repos='https://nowosad.github.io/drat/', type='source')
library(spData)
data("world", package = "spData")
glimpse(world)
## Rows: 177
## Columns: 11
## $ iso_a2 <chr> "FJ", "TZ", "EH", "CA", "US", "KZ", "UZ", "PG", "ID", "AR", …
## $ name_long <chr> "Fiji", "Tanzania", "Western Sahara", "Canada", "United Stat…
## $ continent <chr> "Oceania", "Africa", "Africa", "North America", "North Ameri…
## $ region_un <chr> "Oceania", "Africa", "Africa", "Americas", "Americas", "Asia…
## $ subregion <chr> "Melanesia", "Eastern Africa", "Northern Africa", "Northern …
## $ type <chr> "Sovereign country", "Sovereign country", "Indeterminate", "…
## $ area_km2 <dbl> 19289.97, 932745.79, 96270.60, 10036042.98, 9510743.74, 2729…
## $ pop <dbl> 885806, 52234869, NA, 35535348, 318622525, 17288285, 3075770…
## $ lifeExp <dbl> 69.96000, 64.16300, NA, 81.95305, 78.84146, 71.62000, 71.039…
## $ gdpPercap <dbl> 8222.2538, 2402.0994, NA, 43079.1425, 51921.9846, 23587.3375…
## $ geom <MULTIPOLYGON [°]> MULTIPOLYGON (((-180 -16.55..., MULTIPOLYGON ((…
If spData is not available, there are other
sf-based world dataset (e.g.,
rnaturalearth::ne_countries() plus
st_as_sf()).
sfLet’s explore life expectancy (lifeExp from
world) and see how it varies across countries.
# producing a choropleth world
ggplot(world) +
geom_sf(aes(fill = lifeExp)) +
coord_sf() +
theme_minimal()
ggplot(world) +
geom_sf(aes(fill = lifeExp), color = "white", size = 0.1) +
coord_sf() +
scale_fill_viridis_c(
option = "plasma",
name = "Life Expectancy\n(years)",
na.value = "grey80"
) +
labs(
title = "Global Life Expectancy by Country"
) +
theme_tufte() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "bottom",
legend.key.width = unit(2, "cm")
)
The map shows how life expectancy varies across countries, with clear differences between regions. Higher values appear in much of Europe, Australia, North America and parts of East Asia like Japan, while lower life expectancy is concentrated in many African countries.
Using st_transform(), let’s present the same
data under at least two different projected
CRSs, for example:
if (!require("patchwork", quietly = TRUE)) install.packages("patchwork")
library(patchwork)
# Web Mercator projection
world_mercator <- st_transform(world, crs = "EPSG:3857")
# Mollweide equal-area projection
world_mollweide <- st_transform(world, crs = "ESRI:54009")
# Web Mercator projection
p_mercator <- ggplot(world_mercator) +
geom_sf(aes(fill = lifeExp), color = "white", size = 0.1) +
scale_fill_viridis_c(
option = "plasma",
name = "Life Expectancy\n(years)",
na.value = "grey80"
) +
labs(
title = "Web Mercator Projection"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 12),
legend.position = "none"
)
# Mollweide equal-area projection
p_mollweide <- ggplot(world_mollweide) +
geom_sf(aes(fill = lifeExp), color = "white", size = 0.1) +
scale_fill_viridis_c(
option = "plasma",
name = "Life\nExpectancy\n(years)",
na.value = "grey80"
) +
labs(
title = "Mollweide Equal-Area Projection"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 12),
legend.position = "left",
legend.key.width = unit(0.5, "cm")
)
# Displaying side by side
(p_mercator | p_mollweide) +
plot_layout(widths = c(10, 11))
Map projections change how regions look on a map. Some, like Web Mercator, keep shapes correct locally but make areas near the poles look huge, so Greenland and Antarctica appear much bigger than they really are. Others, like Mollweide, preserve the true area of regions but distort shapes, stretching edges and changing how countries look.
For showing life expectancy, an area-preserving projection like Mollweide is better because it accurately represents the size of each country, which matters when comparing totals or averages. Shape is less important here than showing the correct relative size of populations. This follows Wilke’s idea that for data tied to area or population, equal-area maps are more appropriate than shape-preserving ones.
socviz and design
choicesHealy’s book uses county-level maps to illustrate how small spatial units can make choropleths visually complex.
The socviz package includes:
county_map — geometry for U.S. counties,county_data — attributes (e.g., demographic and
economic variables).data("county_map", "county_data", package = "socviz")
glimpse(county_map)
## Rows: 191,382
## Columns: 7
## $ long <dbl> 1225889, 1235324, 1244873, 1244129, 1272010, 1276797, 1273832, 1…
## $ lat <dbl> -1275020, -1274008, -1272331, -1267515, -1262889, -1295514, -129…
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1…
## $ hole <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ piece <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ group <fct> 0500000US01001.1, 0500000US01001.1, 0500000US01001.1, 0500000US0…
## $ id <chr> "01001", "01001", "01001", "01001", "01001", "01001", "01001", "…
glimpse(county_data)
## Rows: 3,195
## Columns: 32
## $ id <chr> "0", "01000", "01001", "01003", "01005", "01007", "01…
## $ name <chr> NA, "1", "Autauga County", "Baldwin County", "Barbour…
## $ state <fct> NA, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, A…
## $ census_region <fct> NA, South, South, South, South, South, South, South, …
## $ pop_dens <fct> "[ 50, 100)", "[ 50, 100)", "[ 50, 100)", "[…
## $ pop_dens4 <fct> "[ 45, 118)", "[ 45, 118)", "[ 45, 118)", "[118,71…
## $ pop_dens6 <fct> "[ 82, 215)", "[ 82, 215)", "[ 82, 215)", "[ 82, …
## $ pct_black <fct> "[10.0,15.0)", "[25.0,50.0)", "[15.0,25.0)", "[ 5.0,1…
## $ pop <int> 318857056, 4849377, 55395, 200111, 26887, 22506, 5771…
## $ female <dbl> 50.8, 51.5, 51.5, 51.2, 46.5, 46.0, 50.6, 45.2, 53.4,…
## $ white <dbl> 77.7, 69.8, 78.1, 87.3, 50.2, 76.3, 96.0, 27.2, 54.3,…
## $ black <dbl> 13.2, 26.6, 18.4, 9.5, 47.6, 22.1, 1.8, 69.9, 43.6, 2…
## $ travel_time <dbl> 25.5, 24.2, 26.2, 25.9, 24.6, 27.6, 33.9, 26.9, 24.0,…
## $ land_area <dbl> 3531905.43, 50645.33, 594.44, 1589.78, 884.88, 622.58…
## $ hh_income <int> 53046, 43253, 53682, 50221, 32911, 36447, 44145, 3203…
## $ su_gun4 <fct> NA, NA, "[11,54]", "[11,54]", "[ 5, 8)", "[11,54]", "…
## $ su_gun6 <fct> NA, NA, "[10,12)", "[10,12)", "[ 7, 8)", "[10,12)", "…
## $ fips <dbl> 0, 1000, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 10…
## $ votes_dem_2016 <int> NA, NA, 5908, 18409, 4848, 1874, 2150, 3530, 3716, 13…
## $ votes_gop_2016 <int> NA, NA, 18110, 72780, 5431, 6733, 22808, 1139, 4891, …
## $ total_votes_2016 <int> NA, NA, 24661, 94090, 10390, 8748, 25384, 4701, 8685,…
## $ per_dem_2016 <dbl> NA, NA, 0.23956855, 0.19565310, 0.46660250, 0.2142203…
## $ per_gop_2016 <dbl> NA, NA, 0.7343579, 0.7735147, 0.5227141, 0.7696616, 0…
## $ diff_2016 <int> NA, NA, 12202, 54371, 583, 4859, 20658, 2391, 1175, 1…
## $ per_dem_2012 <dbl> NA, NA, 0.2657577, 0.2156657, 0.5125229, 0.2621857, 0…
## $ per_gop_2012 <dbl> NA, NA, 0.7263374, 0.7738975, 0.4833755, 0.7306638, 0…
## $ diff_2012 <int> NA, NA, 11012, 47443, 334, 3931, 17780, 2808, 714, 14…
## $ winner <chr> NA, NA, "Trump", "Trump", "Trump", "Trump", "Trump", …
## $ partywinner16 <chr> NA, NA, "Republican", "Republican", "Republican", "Re…
## $ winner12 <chr> NA, NA, "Romney", "Romney", "Obama", "Romney", "Romne…
## $ partywinner12 <chr> NA, NA, "Republican", "Republican", "Democrat", "Repu…
## $ flipped <chr> NA, NA, "No", "No", "Yes", "No", "No", "No", "No", "N…
Joined county_map with county_data
using the common ID.
Used household income (hh_income from
county_data) to map that is not about
elections.
Created a county-level choropleth using ggplot() +
geom_polygon() or geom_map() and an
appropriate color scale. Used coord_quickmap() to stabilize
aspect ratio.
Let’s produce the map and clearly label household income in the title and legend.
# Joining county map and data
county_full <- county_map %>%
left_join(county_data, by = "id")
# Creating national county-level choropleth for median household income
ggplot(county_full, aes(x = long, y = lat, group = group, fill = hh_income)) +
geom_polygon() + # No borders for cleaner appearance
coord_quickmap() +
scale_fill_viridis_c(
option = "magma",
name = "Median Household\nIncome ($)",
labels = scales::dollar_format(scale = 0.001, suffix = "K"),
na.value = "grey90"
) +
labs(
title = "Median Household Income by County, United States"
) +
theme_tufte() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "right",
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
Using the joined data, let’s filter to South region and create a zoomed-in county-level map.
# Filtering for South census region
county_south <- county_full %>%
filter(census_region == "South")
# Creating regional map for South
ggplot(county_south, aes(x = long, y = lat, group = group, fill = hh_income)) +
geom_polygon() +
coord_quickmap() +
scale_fill_viridis_c(
option = "magma",
name = "Median Household\nIncome ($)",
labels = scales::dollar_format(scale = 0.001, suffix = "K"),
na.value = "grey90"
) +
labs(
title = "Median Household Income: South Region"
) +
theme_tufte() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "right",
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
The regional map makes it easier to spot local income patterns such as wealthy clusters around major metro areas like Northern Virginia, Atlanta, and parts of Texas, and clear pockets of poverty in the Mississippi Delta and Appalachia. County-level differences also stand out, showing sharp contrasts between prosperous suburban counties and lower-income rural areas.
The national map can mislead by making large, sparsely populated Western counties look more important than they are, simply because they take up more space. It can also overwhelm readers with thousands of tiny counties, causing real patterns to blur and giving a false sense of where most people actually live or experience poverty.
A dot plot where each dot represents a county would show median household income better than the county map. The x-axis would display household income amounts, the y-axis could list states or regions, and different dot colors would show which census region each county belongs to (Northeast, South, Midwest, West). The dots could be made larger with county names labeled inside each dot so specific counties can be identified easily. This approach treats every county the same way visually, so tiny Rhode Island counties get the same dot size as huge Alaska counties, removing geographic area bias entirely.
sf around the White
HouseIn this section, let’s manually select an area
around the White House in Washington, DC, download OpenStreetMap data as
GeoJSON, load it into R with sf and map
some features.
In Overpass Turbo (or a similar OSM interface):
geojason.In R, read the GeoJSON file as an sf
object.
# Loading GeoJSON file
wh_features <- read_sf("white_house_area.geojson")
# Geometry types present
table(st_geometry_type(wh_features))
##
## GEOMETRY POINT LINESTRING POLYGON
## 0 25 11 2065
## MULTIPOINT MULTILINESTRING MULTIPOLYGON GEOMETRYCOLLECTION
## 0 0 8 0
## CIRCULARSTRING COMPOUNDCURVE CURVEPOLYGON MULTICURVE
## 0 0 0 0
## MULTISURFACE CURVE SURFACE POLYHEDRALSURFACE
## 0 0 0 0
## TIN TRIANGLE
## 0 0
# Attributes present
str(wh_features)
## sf [2,109 × 299] (S3: sf/tbl_df/tbl/data.frame)
## $ id : chr [1:2109] "relation/286293" "relation/380759" "relation/380760" "relation/380769" ...
## $ @id : chr [1:2109] "relation/286293" "relation/380759" "relation/380760" "relation/380769" ...
## $ access : chr [1:2109] NA NA NA NA ...
## $ addr:city : chr [1:2109] "Washington" "Washington" "Washington" NA ...
## $ addr:country : chr [1:2109] NA NA NA NA ...
## $ addr:district : chr [1:2109] NA NA NA NA ...
## $ addr:housename : chr [1:2109] "Treasury Building" NA NA NA ...
## $ addr:housenumber : chr [1:2109] "1500" "1000" NA NA ...
## $ addr:place : chr [1:2109] NA NA NA NA ...
## $ addr:postcode : chr [1:2109] NA "20585" "20250" NA ...
## $ addr:state : chr [1:2109] "DC" NA "DC" NA ...
## $ addr:street : chr [1:2109] "Pennsylvania Avenue Northwest" "Independence Avenue Southwest" "Independence Avenue Southwest" NA ...
## $ addr:unit : chr [1:2109] NA NA NA NA ...
## $ air_conditioning : chr [1:2109] NA NA NA NA ...
## $ alt_name : chr [1:2109] NA NA NA NA ...
## $ alt_name:es : chr [1:2109] NA NA NA NA ...
## $ alt_name:hr : chr [1:2109] NA NA NA NA ...
## $ alt_name:vi : chr [1:2109] NA NA NA NA ...
## $ alt_name_1 : chr [1:2109] NA NA NA NA ...
## $ alt_name_1:es : chr [1:2109] NA NA NA NA ...
## $ alt_name_2 : chr [1:2109] NA NA NA NA ...
## $ alt_name_3 : chr [1:2109] NA NA NA NA ...
## $ alt_name_4 : chr [1:2109] NA NA NA NA ...
## $ amenity : chr [1:2109] NA NA NA NA ...
## $ architect : chr [1:2109] NA NA "Louis A. Simon" NA ...
## $ architect:wikidata : chr [1:2109] NA NA "Q6686579" NA ...
## $ architect:wikipedia : chr [1:2109] NA NA NA NA ...
## $ artist_name : chr [1:2109] NA NA NA NA ...
## $ artwork_type : chr [1:2109] NA NA NA NA ...
## $ atm : chr [1:2109] NA NA NA NA ...
## $ attraction : chr [1:2109] NA NA NA NA ...
## $ bar : chr [1:2109] NA NA NA NA ...
## $ barrier : chr [1:2109] NA NA NA NA ...
## $ bench : chr [1:2109] NA NA NA NA ...
## $ bin : chr [1:2109] NA NA NA NA ...
## $ books : chr [1:2109] NA NA NA NA ...
## $ branch : chr [1:2109] NA NA NA NA ...
## $ brand : chr [1:2109] NA NA NA NA ...
## $ brand:short : chr [1:2109] NA NA NA NA ...
## $ brand:website : chr [1:2109] NA NA NA NA ...
## $ brand:wikidata : chr [1:2109] NA NA NA NA ...
## $ brewery : chr [1:2109] NA NA NA NA ...
## $ bridge:structure : chr [1:2109] NA NA NA NA ...
## $ building : chr [1:2109] "government" "public" "government" "yes" ...
## $ building:colour : chr [1:2109] NA NA NA NA ...
## $ building:facade:material : chr [1:2109] NA NA NA NA ...
## $ building:flats : chr [1:2109] NA NA NA NA ...
## $ building:levels : chr [1:2109] "5" "8" "6" "15" ...
## $ building:levels:aboveground : chr [1:2109] NA NA NA NA ...
## $ building:levels:underground : chr [1:2109] NA NA "2" NA ...
## $ building:material : chr [1:2109] NA NA NA NA ...
## $ building:min_level : chr [1:2109] NA NA NA NA ...
## $ building:name : chr [1:2109] NA NA NA NA ...
## $ building:part : chr [1:2109] NA NA NA NA ...
## $ building:wikidata : chr [1:2109] NA NA NA NA ...
## $ building:wikimedia_commons : chr [1:2109] NA NA NA NA ...
## $ building:wikipedia : chr [1:2109] NA NA NA NA ...
## $ capturedate : chr [1:2109] NA NA NA NA ...
## $ captureyear : chr [1:2109] NA NA NA NA ...
## $ charge : chr [1:2109] NA NA NA NA ...
## $ charge:conditional : chr [1:2109] NA NA NA NA ...
## $ check_date : Date[1:2109], format: NA NA ...
## $ check_date:internet_access : Date[1:2109], format: NA NA ...
## $ check_date:opening_hours : Date[1:2109], format: NA NA ...
## $ club : chr [1:2109] NA NA NA NA ...
## $ construction : chr [1:2109] NA NA NA NA ...
## $ contact:email : chr [1:2109] NA NA NA NA ...
## $ contact:facebook : chr [1:2109] NA NA NA NA ...
## $ contact:foursquare : chr [1:2109] NA NA "https://www.foursquare.com/v/4bc8e17e3740b713ae995d65" NA ...
## $ contact:instagram : chr [1:2109] NA NA NA NA ...
## $ contact:phone : chr [1:2109] NA NA NA NA ...
## $ contact:twitter : chr [1:2109] NA NA NA NA ...
## $ contact:website : chr [1:2109] NA NA NA NA ...
## $ contact:youtube : chr [1:2109] NA NA NA NA ...
## $ country : chr [1:2109] NA NA NA NA ...
## $ cuisine : chr [1:2109] NA NA NA NA ...
## $ dataset : chr [1:2109] NA NA NA NA ...
## $ dcgis:address : chr [1:2109] "1500 Pennsylvania Ave, NW" NA NA NA ...
## $ dcgis:aid : chr [1:2109] "297687" NA NA NA ...
## $ dcgis:area : chr [1:2109] "10268.602225" NA NA NA ...
## $ dcgis:capturedate : Date[1:2109], format: NA NA ...
## $ dcgis:captureyear : chr [1:2109] NA "20080530" NA "19990331" ...
## $ dcgis:dataset : chr [1:2109] NA "buildings" NA "buildings" ...
## $ dcgis:facuse : chr [1:2109] NA NA NA NA ...
## $ dcgis:featurecode : chr [1:2109] NA "2000" NA "2000" ...
## $ dcgis:gis : chr [1:2109] "HSE_0002" NA NA NA ...
## $ dcgis:gis_id : chr [1:2109] NA "94809" NA NA ...
## $ dcgis:label : chr [1:2109] "Treasury Department" NA NA NA ...
## $ dcgis:length : chr [1:2109] "958.41899353" NA NA NA ...
## $ dcgis:list_info : chr [1:2109] "Reg" NA NA NA ...
## $ dcgis:lot : chr [1:2109] NA NA NA NA ...
## $ dcgis:nr_eligibl : chr [1:2109] "0" NA NA NA ...
## $ dcgis:objectid : chr [1:2109] "561" NA NA NA ...
## $ dcgis:pubdate : Date[1:2109], format: NA NA ...
## $ dcgis:sqft : chr [1:2109] NA NA NA NA ...
## $ dcgis:square : chr [1:2109] NA NA NA NA ...
## $ dcgis:ssl : chr [1:2109] "0187S 0802" NA NA NA ...
## $ dcgis:update_date : chr [1:2109] "Wed Feb 01 00:00:00 UTC 2006" NA NA NA ...
## $ dcgis:url : chr [1:2109] "http://www.planning.dc.gov/planning/cwp/view,a,1284,q,570748,planningNav_GID,1706,planningNav,|33515|.asp" NA NA NA ...
## [list output truncated]
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:298] "id" "@id" "access" "addr:city" ...
The dataset contains three main geometry types: 2,065 POLYGONs (representing buildings and land areas), 25 POINTs (marking specific locations like monuments), and 11 LINESTRINGs (showing paths or roads). Interesting attributes for mapping include building (building types like government/public), historic and heritage (marking heritage sites), name (feature names), landuse (grass, parks), amenity and leisure (parks and public spaces), addr:street and addr:housenumber (addresses), start_date (construction year), and building:levels (number of floors). Additional useful columns include ref:nrhp (National Register of Historic Places numbers), wikipedia links, and government or office types for identifying federal buildings.
ggplot2 + sf map of selected
featuresUsing wh_features object:
Filtered the sf object to keep features like
buildings, government buildings and green spaces.
Created a static map using ggplot2 and
geom_sf():
# Filtering for different feature types
# All buildings for base layer
all_buildings <- wh_features %>%
filter(!is.na(building))
# Government buildings
govt_buildings <- wh_features %>%
filter(building == "government" | building == "public" | office == "government")
# Green spaces
greens <- wh_features %>%
filter(landuse == "grass" | !is.na(landuse))
# Creating static map
ggplot() +
# Base layer: all buildings (light tan)
geom_sf(data = all_buildings,
fill = "#D2B48C", color = "#8B7355",
alpha = 0.5, size = 0.3) +
# Green spaces layer (green)
geom_sf(data = greens,
fill = "#90EE90", color = "#228B22",
alpha = 0.7, size = 0.4) +
# Government buildings (red)
geom_sf(data = govt_buildings,
fill = "#DC143C", color = "#8B0000",
alpha = 0.8, size = 0.4) +
coord_sf() +
labs(
title = "Government Buildings Around the White House",
subtitle = "Red = Government buildings | Tan = Buildings | Green = Green spaces",
) +
theme_tufte() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10),
panel.grid = element_line(color = "grey90", size = 0.2),
axis.text = element_blank(),
axis.title = element_blank()
)
The bounding box covers a few blocks around the White House to show major government buildings like the Treasury and Agriculture departments without including too much clutter. The map uses three layers: tan for regular buildings as background, bright green for parks and grass areas, and red for government buildings to make them pop out. This setup makes government buildings easy to spot against the neutral background, with green spaces helping viewers orient themselves. The main drawback is that all buildings of the same type look identical regardless of their actual importance or size, and overlapping features can sometimes create visual confusion.
leaflet mapUsing leaflet to create an interactive map of
wh_features area.
# Transforming to WGS84 (leaflet expects long/lat)
wh_leaflet <- st_transform(wh_features, crs = "+proj=longlat +datum=WGS84")
# Creating filtered datasets for leaflet
greens_leaflet <- wh_leaflet %>%
filter(!is.na(landuse))
govt_leaflet <- wh_leaflet %>%
filter(building == "government" | building == "public" | office == "government")
all_buildings_leaflet <- wh_leaflet %>%
filter(!is.na(building))
# Creating popup content for government buildings
govt_popup <- govt_leaflet %>%
mutate(popup_text = paste0(
"<b>", ifelse(is.na(name), "Government Building", name), "</b><br>",
ifelse(!is.na(building), paste0("Building Type: ", building, "<br>"), ""),
ifelse(!is.na(office), paste0("Office Type: ", office, "<br>"), ""),
ifelse(!is.na(`addr:street`), paste0("Address: ",
ifelse(!is.na(`addr:housenumber`), paste0(`addr:housenumber`, " "), ""),
`addr:street`, "<br>"), ""),
ifelse(!is.na(`building:levels`), paste0("Levels: ", `building:levels`, "<br>"), ""),
ifelse(!is.na(`start_date`), paste0("Built: ", `start_date`, "<br>"), ""),
ifelse(!is.na(heritage), paste0("Heritage Status: ", heritage, "<br>"), ""),
ifelse(!is.na(`ref:nrhp`), paste0("NRHP #: ", `ref:nrhp`, "<br>"), ""),
ifelse(!is.na(wikipedia), paste0("<a href='https://en.wikipedia.org/wiki/",
gsub("en:", "", wikipedia), "' target='_blank'>Wikipedia</a>"), "")
))
# Creating popup content for green spaces
greens_popup <- greens_leaflet %>%
mutate(popup_text = paste0(
"<b>", ifelse(is.na(name), "Green Space", name), "</b><br>",
ifelse(!is.na(landuse), paste0("Land Use: ", landuse, "<br>"), ""),
ifelse(!is.na(leisure), paste0("Leisure Type: ", leisure, "<br>"), ""),
ifelse(!is.na(amenity), paste0("Amenity: ", amenity), "")
))
# Creating popup content for all buildings
buildings_popup <- all_buildings_leaflet %>%
mutate(popup_text = paste0(
"<b>", ifelse(is.na(name), "Building", name), "</b><br>",
ifelse(!is.na(building), paste0("Type: ", building, "<br>"), ""),
ifelse(!is.na(`addr:street`), paste0("Address: ",
ifelse(!is.na(`addr:housenumber`), paste0(`addr:housenumber`, " "), ""),
`addr:street`), "")
))
# Building interactive leaflet map
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
# All buildings layer (light gray base)
addPolygons(
data = buildings_popup,
fillColor = "#D2B48C",
fillOpacity = 0.5,
color = "#8B7355",
weight = 1,
popup = ~popup_text,
group = "All Buildings",
highlightOptions = highlightOptions(
color = "#666666",
weight = 2,
bringToFront = FALSE
)
) %>%
# Green spaces layer
addPolygons(
data = greens_popup,
fillColor = "#90EE90",
fillOpacity = 0.7,
color = "#228B22",
weight = 2,
popup = ~popup_text,
group = "Green Spaces",
highlightOptions = highlightOptions(
color = "yellow",
weight = 3,
bringToFront = TRUE
)
) %>%
# Government buildings layer (red)
addPolygons(
data = govt_popup,
fillColor = "#DC143C",
fillOpacity = 0.8,
color = "#8B0000",
weight = 2,
popup = ~popup_text,
group = "Government Buildings",
highlightOptions = highlightOptions(
color = "orange",
weight = 4,
bringToFront = TRUE
)
) %>%
# Layer controls
addLayersControl(
overlayGroups = c("All Buildings", "Green Spaces", "Government Buildings"),
options = layersControlOptions(collapsed = FALSE)
) %>%
# Legend
addLegend(
position = "bottomright",
colors = c("#D2B48C", "#90EE90", "#DC143C"),
labels = c("All Buildings", "Green Spaces", "Government Buildings"),
title = "Feature Types",
opacity = 0.8
) %>%
# Setting initial view
setView(lng = -77.032, lat = 38.890, zoom = 15) %>%
# White House marker
addMarkers(
lng = -77.0365,
lat = 38.8977)
The static ggplot2+sf map works best for publications, presentations, and printed reports because it delivers a clear, controlled visual message that everyone sees the same way. The interactive leaflet map is better for exploration and web applications where users need to zoom, toggle layers, click buildings for details like names and addresses, and investigate the area at their own pace. Static maps communicate one focused story efficiently, while interactive maps let users discover information themselves through hands-on exploration.