This document outlines the procedure to identify neighborhoods and define their boundaries for any given county in the US. Generally, this method works better for cities and is less helpful for rural areas or small towns. By this method, neighborhoods are comprised of one or several census tracts. The general method is as follows:
## Set working directory and clear memory
setwd("/Users/shannoncarter/Documents/JanuaryAdvisors/civicap")
rm(list = ls(all = T))
## Load required packages
require(tidyverse)
require(sf)
require(leaflet)
require(tigris)
require(htmlwidgets)
require(lwgeom)
require(raster)
require(rmapzen)
require(geojsonio)
## Set CRS to use for all spatial data
wgs84 <- st_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0")
You can do this part for one or many counties. Note though that later parts should be done one county at a time because they are computationally intensive. For a non-county starting point, download shapefile of whichever boundary (e.g.,city) you choose and st_join with census tracts to include only those census tracts in that boundary.
### COUNTY BOUNDARIES ###
# use tigris to get county boundaries - 48 = FIPS code for TX. google for desired state
counties <- tigris::counties(state = 48) %>%
# make a simple features object
st_as_sf() %>%
# filter desired county/ies. can use name, or 3-digit FIPS code
filter(NAME == "Collin" | NAME == "Dallas" | NAME == "Fort Bend" | NAME == "Tarrant" | NAME == "Travis") %>%
# transform to our chosen CRS
st_transform(crs = wgs84) %>%
# select only the columns we need and give better names
dplyr::select(GEOID = "GEOID", county = "NAME", geometry = "geometry")
### CENSUS TRACT BOUNDARIES ###
# use tigris to get census tract boundaries for all 5 counties
# specify counties in call here, bc it lots of data, so don't want it loading the whole state
tracts <- tigris::tracts("48", c("085", "113", "157", "439", "453"), cb = T) %>%
# make a simple features object
st_as_sf() %>%
# transform to our chosen CRS
st_transform(crs = wgs84) %>%
# rename FIPS codes to county names
mutate(county = case_when(
COUNTYFP == "085" ~ "Collin",
COUNTYFP == "113" ~ "Dallas",
COUNTYFP == "157" ~ "Fort Bend",
COUNTYFP == "439" ~ "Tarrant",
COUNTYFP == "453" ~ "Travis")) %>%
# select only the columns we need and give better names
dplyr::select(county = "county", GEOID = "GEOID", geometry = "geometry")
The algorithm will scan over every cell of this grid and extract ‘places’ from the Mapzen API. Adjust the ‘cellsize’ argument for different resolutions. Larger cell size will give bigger places (e.g., cities and towns). 0.02 works well for neighborhood-level resolution.
## Make a variable-sized hex grid across one or all counties
# size is variable -- 0.02 seems like goo resolution
hexes <- counties %>%
# can do one or multiple counties
filter(county == "Travis") %>%
# 0.02 works well for neighborhoods in cities. Consider trying other resolutions for different purposes
st_make_grid(cellsize = .02, square = FALSE) %>%
# make an sf object
st_sf() %>%
# add a cell id for bookkeeping purposes
mutate(cell_id = row_number())
## Map hexes
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-97.8, 30.3, zoom = 9) %>%
# county boundary
addPolygons(
data = counties,
fill = "white",
fillOpacity = 0,
weight = 2,
color = 'black'
) %>%
# hexes
addPolygons(
data = hexes,
fill = 'blue',
color = 'blue',
weight = 1,
fillOpacity = 0.5,
label = ~paste0(cell_id)
)
Now, loop through every hex grid cell above and extract the ‘places’ within with the Mapzen API. This takes about 10 minutes per county.
## Mapzen API key
# https://cran.r-project.org/web/packages/rmapzen/vignettes/rmapzen-introduction.html
#mz_set_tile_host_nextzen(key = "yourAPIkeyhere")
## Make a function to extract places from a bound box
get_vector_tiles <- function(bbox){
mz_box = mz_rect(bbox$xmin,bbox$ymin,bbox$xmax,bbox$ymax)
mz_vector_tiles(mz_box)
}
## Loop through all hexes and extract places
# for each row of hexes (sf object made above)
for (i in 1:nrow(hexes)) {
# subset data by row
row <- hexes[i,]
# make a bound box
bounds <- extent(row)
lon_min = as.numeric(bounds@xmin)
lon_max = as.numeric(bounds@xmax)
lat_min = as.numeric(bounds@ymin)
lat_max = as.numeric(bounds@ymax)
bbox <- st_bbox(c(xmin = lon_min, xmax = lon_max, ymax = lat_max, ymin = lat_min), crs = st_crs(4326))
# pull vector tiles for bounding box using function above
vector_tiles <- get_vector_tiles(bbox)
# some hexes don't have places-- write logic to fill these with NA
# if no places...
if (length(vector_tiles[["places"]][["features"]]) == 0) {
# make a df with NA for all places-associated fields
places <- st_sf(kind = NA, name = NA, geometry = NA,
cell_id = row$cell_id,
# needs a geo col to make an sf, so need to keep this
cell_geo = row$geometry)
# if places...
} else {
# extract places and make a df with cell info
places <- as_sf(vector_tiles$places) %>%
dplyr::select(kind, name, geometry) %>%
dplyr::mutate(cell_id = row$cell_id,
cell_geo = row$geometry)
}
# monitor progress
#print(row$cell_id)
# make/add to df
if (i == 1) {
combined <- places
} else {
combined <- rbind(combined, places)
}
}
Clean and save places simple features object
Make a csv with the census tracts. Copy to an excel workbook– this is where you will populate the crosswalk.
## Pull census tract names for selected counties to make neighborhood crosswalk
tracts_for_crosswalk <- tracts %>%
# filter county/ies of interest
filter(county == "Travis") %>%
# order by tract ID to make things easier later
arrange(county, GEOID) %>%
# helpful to pull out last 4 digits-- easier for searching/matching later
mutate(tract = str_sub(GEOID, 8, 11),
neighborhood = NA)
# remove geometry
tracts_for_crosswalk$geometry <- NULL
# save to csv
# ** HIGHLY recommend copying contents to excel file and populating there so you don't accidently overwrite
write.csv(tracts_for_crosswalk, "tracts_for_crosswalk.csv")
Build a map with census tracts and ‘places’ extracted from mapzen. You’ll use this to build the crossalk between census tract and neighborhood.
## Load places, tract, and county data
# if any of these contain multiple counties, filter to do one county at a time
# the map used to build crosswalk has a lot of data and won't render with multiple counties
p <- readRDS("places.rds") %>%
st_as_sf(crs = wgs84) %>%
st_join(counties) %>%
filter(county == "Travis")
t <- tracts %>%
filter(county == "Travis") %>%
mutate(tract = substr(GEOID, 8, 11))
c <- counties %>%
filter(county == "Travis")
## Map places over census tracts
places_map <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-97.8, 30.3, zoom = 9) %>%
# county boundary
addPolygons(
data = c,
fill = "white",
fillOpacity = 0,
weight = 2,
color = 'black'
) %>%
# census tracts, permanent 4-digit label
# 4-digit code, not a popup, makes it easier to build crosswalk
addPolygons(
data = t,
fill = 'white',
fillOpacity = 0,
weight = 1.5,
color = 'blue',
label = ~paste0(tract),
labelOptions = labelOptions(noHide = T, direction = 'middle', textOnly = T, textsize = 14)
) %>%
# census tracts, popup full census tract label
# sometimes there are replicate 4-digit ('tract' in df). have full tract available
addPolygons(
data = t,
fill = 'white',
fillOpacity = 0,
weight = 1.5,
color = 'blue',
label = ~paste0(GEOID)
) %>%
# places -- mapped without boundaries, text only
addLabelOnlyMarkers(
data = p,
label = ~as.character(name),
labelOptions = labelOptions(noHide = T, direction = 'top', textOnly = T, textsize = 16))
## Save as html file
saveWidget(places_map, file = "tracts_places_map.html")
Open tracts_places_map.html and crosswalk.xlsx (copied from tracts_for_crosswalk.csv). Make column “neighborhood” in crosswalk.xlsx and populate with “places” from the map. Assign a neighborhood to each census tract. Multiple census tracts can/should comprise a neighborhood. Some hints:
The crosswalk assigns each census tract ID to a neighborhood. Now, we need to add back in the census tract geometry and summarize to dissolve the inner boundaries, where multiple census tracts comprise a neighborhood. This will leave us with the names and outlines of the neighborhoods. Save this as an sf object
# Read in crosswalk and summarize by neighborhood
n <- read.csv("tract_neighborhood_crosswalk.csv", header = T) %>%
mutate(GEOID = as.character(GEOID)) %>%
# join with tracts-- this brings geometry to each tract
left_join(tracts) %>%
st_sf() %>%
# group by neighborhood
group_by(neighborhood) %>%
# dissolves interior boundaries, keep county name
summarize(county = first(county))
# Save as R data file
saveRDS(n, 'neighborhoods.rds')
See what you’ve made, share with locals, and make any necessary changes to the crosswalk.
# Read in crosswalk
crosswalk <- read.csv("tract_neighborhood_crosswalk.csv")
# Add a variable to map to color
neighborhoods <- n %>%
mutate(category = factor(sample.int(12L, nrow(.), TRUE)))
# Join with tract shapefile
data2 <- tracts %>%
mutate(GEOID = as.numeric(GEOID)) %>%
left_join(crosswalk) %>%
st_join(neighborhoods)
# make palettes
set.seed(999)
pc1 <- colorFactor(rainbow(12), neighborhoods$category)
## Map places over census tracts
n_map <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-97.7, 30.27, zoom = 9) %>%
addPolygons(
data = counties,
fill = "white",
fillOpacity = 0,
weight = 2,
color = 'black'
) %>%
addPolygons(
data = tracts,
color = "black",
stroke = T,
fillOpacity = 0,
weight = 0.7,
dashArray = "3"
) %>%
addPolygons(
data = neighborhoods,
color = ~pc1(category),
stroke = T,
fillOpacity = 0.4,
weight = 1.5
) %>%
addPolygons(
data = neighborhoods,
fill = "white",
fillOpacity = 0,
weight = 1.5,
color = 'black',
label = ~paste0(neighborhood)
)
n_map