suppressPackageStartupMessages({
library(rgdal)
library(rgeos)
library(dplyr)
library(stringr)
})
#' To download and extract a zipped file (that may not have .zip at the end of the URL)
unzip_url <- function (url, destfile, exdir, quiet = FALSE) {
if (missing(destfile)) destfile <- basename(url)
if (missing(exdir)) exdir <- file.path(getwd(), sub("\\.zip$", "", destfile))
zipfile <- file.path(exdir, destfile)
if (!file.exists(zipfile)) {
dir.create(exdir, recursive = TRUE)
download.file(url, zipfile)
}
if (!quiet) message("Unzipping into: ", exdir)
contents <- unzip(zipfile, exdir = exdir)
return(exdir)
}
#' To extract a layer from an (unzipped) shapefile
extract_layer <- function (dsn, layer, datum, quiet = FALSE, ...) {
if (missing(layer)) layer <- basename(dsn)
msg <- capture.output(spobj <- readOGR(normalizePath(dsn), layer, ...))
if (!quiet) {
message(msg)
message(str(spobj@data))
}
if (!missing(datum)) spobj <- spTransform(spobj, datum)
return(spobj)
}
CA_shapefile <- unzip_url(
"http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_06_tract_500k.zip",
quiet = TRUE)
CA_tracts <- extract_layer(
CA_shapefile,
quiet = TRUE)
CARE_shapefile <- unzip_url(
"http://www.baaqmd.gov/~/media/Files/Planning and Research/CARE Program/Documents/Revised_2013_Impacted_Boundary.ashx",
destfile = "Revised_2013_Impacted_Boundary.zip",
quiet = TRUE)
CARE_polygons <- extract_layer(
CARE_shapefile,
datum = CRS(proj4string(CA_tracts)), # NAD83 longlat
quiet = TRUE)
filter_features <- function (spgeom1, spgeom2, FUN=gIntersects, ...) {
ij <- FUN(spgeom2, spgeom1, byid = TRUE, ...)
i <- which(as.logical(rowSums(ij)))
return(spgeom1[i,])
}
# Narrow down the set of candidate tracts ...
candidate_tracts <- filter_features(CA_tracts, gConvexHull(CARE_polygons))
# ... then be exact
CARE_tracts <- filter_features(candidate_tracts, CARE_polygons)
whole_tracts <- filter_features(candidate_tracts, CARE_polygons, gContains)
CARE boundaries fully or partly enclose 608 tracts. Of these, 338 tracts lie wholly inside a CARE region.
shade_polygons <- function (spobj, col = gray(0.6), add = TRUE, ...) {
plot(spobj, col = col, border = NA, add = add, ...)
}
draw_outlines <- function (spobj, border = "#BB0000", add = TRUE, ...) {
plot(spobj, border = border, add = add, ...)
}
par(mar = c(0, 0, 0, 0))
shade_polygons(CARE_tracts, gray(0.8), add = FALSE)
shade_polygons(whole_tracts)
draw_outlines(CARE_polygons)
`%not_in%` <- Negate(`%in%`)
fragmented_tracts <- subset(CARE_tracts, GEOID %not_in% whole_tracts$GEOID)
area <- function (spobj, datum, ...) UseMethod("area")
area.SpatialPolygons <- area.SpatialPolygonsDataFrame <- function (spobj, datum, ...) {
if (!missing(datum)) spobj <- spTransform(spobj, datum)
stopifnot(is.projected(spobj))
areas <- sapply(spobj@polygons, function(x) x@area)
names(areas) <- sapply(spobj@polygons, function(x) x@ID)
return(areas)
}
tabulate_areas <- function (...) {
data.frame(km2 = area(...) / 1.0e6) %>% as.tbl()
}
UTM10 <- CRS(" +proj=utm +zone=10 ellps=WGS84")
area_tbl <- rbind(cbind(tabulate_areas(fragmented_tracts, UTM10), Kind = "fragmented"),
cbind(tabulate_areas(whole_tracts, UTM10), Kind = "whole")) %>% as.tbl()
GM <- function (x) exp(mean(log(x))) # geometric mean
N <- function (x) sum(!is.na(x))
area_tbl %>%
group_by(Kind) %>%
summarise_each(funs(N, min, mean, median, GM, max)) %>%
format(digits = 2)
## Kind N min mean median GM max
## 1 fragmented 270 0.173 4.46 1.55 1.8 122.0
## 2 whole 338 0.057 0.97 0.76 0.7 6.9
inner_fragments <- gIntersection(CARE_polygons, fragmented_tracts, byid = TRUE)
fragment_tbl <- tabulate_areas(inner_fragments, UTM10)
fragment_tbl$tract_FID <- str_split_fixed(row.names(fragment_tbl), " ", n=2)[,2]
fragment_tbl <- fragment_tbl %>% group_by(tract_FID) %>% summarise(km2 = sum(km2))
tract_tbl <- tabulate_areas(fragmented_tracts, UTM10)
tract_tbl$tract_FID <- row.names(tract_tbl)
merged <- fragment_tbl %>%
inner_join(tract_tbl, by = "tract_FID") %>%
mutate(km2_frac = pmin(1.0, km2.x / km2.y))
merged %>% group_by(inner_fraction = cut(km2_frac, seq(0, 1, len=11))) %>% summarise(N = n())
## Source: local data frame [10 x 2]
##
## inner_fraction N
## 1 (0,0.1] 89
## 2 (0.1,0.2] 3
## 3 (0.2,0.3] 9
## 4 (0.3,0.4] 6
## 5 (0.4,0.5] 9
## 6 (0.5,0.6] 8
## 7 (0.6,0.7] 11
## 8 (0.7,0.8] 10
## 9 (0.8,0.9] 14
## 10 (0.9,1] 111
par(mar=c(0, 0, 0, 0))
shade_polygons(whole_tracts, add = FALSE)
shade_polygons(fragmented_tracts, gray(0.8))
i <- subset(merged, km2_frac > 0.5)$tract_FID
shade_polygons(fragmented_tracts[i,])
draw_outlines(CARE_polygons)
There are 154 tracts with more than half their area inside the CARE boundaries. If we add these to the wholly enclosed tracts, we arrive at a final estimate of 154 + 338 = 492 tracts within the CARE boundaries.
Using an alternative heuristic, there are 489 tracts whose centroid falls within a CARE region. (Nearly equal.)