Setup

suppressPackageStartupMessages({
  library(rgdal)
  library(rgeos)
  library(dplyr)
  library(stringr)
})
#' To download and extract a zipped file (that may not have .zip at the end of the URL)
unzip_url <- function (url, destfile, exdir, quiet = FALSE) {
  if (missing(destfile)) destfile <- basename(url)
  if (missing(exdir)) exdir <- file.path(getwd(), sub("\\.zip$", "", destfile))
  zipfile <- file.path(exdir, destfile)
  if (!file.exists(zipfile)) {
    dir.create(exdir, recursive = TRUE) 
    download.file(url, zipfile)
  }
  if (!quiet) message("Unzipping into: ", exdir)
  contents <- unzip(zipfile, exdir = exdir)
  return(exdir)
}
#' To extract a layer from an (unzipped) shapefile
extract_layer <- function (dsn, layer, datum, quiet = FALSE, ...) {
  if (missing(layer)) layer <- basename(dsn)
  msg <- capture.output(spobj <- readOGR(normalizePath(dsn), layer, ...))
  if (!quiet) {
    message(msg)
    message(str(spobj@data))
  }
  if (!missing(datum)) spobj <- spTransform(spobj, datum)
  return(spobj)
}

Data

CA_shapefile <- unzip_url(
  "http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_06_tract_500k.zip", 
  quiet = TRUE)

CA_tracts <- extract_layer(
  CA_shapefile, 
  quiet = TRUE)
CARE_shapefile <- unzip_url(
  "http://www.baaqmd.gov/~/media/Files/Planning and Research/CARE Program/Documents/Revised_2013_Impacted_Boundary.ashx", 
  destfile = "Revised_2013_Impacted_Boundary.zip",
  quiet = TRUE)

CARE_polygons <- extract_layer(
  CARE_shapefile, 
  datum = CRS(proj4string(CA_tracts)),  # NAD83 longlat
  quiet = TRUE)

Analysis

filter_features <- function (spgeom1, spgeom2, FUN=gIntersects, ...) {
  ij <- FUN(spgeom2, spgeom1, byid = TRUE, ...)
  i <- which(as.logical(rowSums(ij)))
  return(spgeom1[i,])
}

# Narrow down the set of candidate tracts ... 
candidate_tracts <- filter_features(CA_tracts, gConvexHull(CARE_polygons))

# ... then be exact
CARE_tracts <- filter_features(candidate_tracts, CARE_polygons)
whole_tracts <- filter_features(candidate_tracts, CARE_polygons, gContains)

CARE boundaries fully or partly enclose 608 tracts. Of these, 338 tracts lie wholly inside a CARE region.

shade_polygons <- function (spobj, col = gray(0.6), add = TRUE, ...) {
  plot(spobj, col = col, border = NA, add = add, ...)
}

draw_outlines <- function (spobj, border = "#BB0000", add = TRUE, ...) {
  plot(spobj, border = border, add = add, ...)
}

par(mar = c(0, 0, 0, 0))
shade_polygons(CARE_tracts, gray(0.8), add = FALSE)
shade_polygons(whole_tracts)
draw_outlines(CARE_polygons)

plot of chunk map_tracts

`%not_in%` <- Negate(`%in%`)
fragmented_tracts <- subset(CARE_tracts, GEOID %not_in% whole_tracts$GEOID)
area <- function (spobj, datum, ...) UseMethod("area")

area.SpatialPolygons <- area.SpatialPolygonsDataFrame <- function (spobj, datum, ...) {
  if (!missing(datum)) spobj <- spTransform(spobj, datum)
  stopifnot(is.projected(spobj))
  areas <- sapply(spobj@polygons, function(x) x@area)
  names(areas) <- sapply(spobj@polygons, function(x) x@ID)
  return(areas)
}
tabulate_areas <- function (...) {
  data.frame(km2 = area(...) / 1.0e6) %>% as.tbl()
}

UTM10 <- CRS(" +proj=utm +zone=10 ellps=WGS84") 

area_tbl <- rbind(cbind(tabulate_areas(fragmented_tracts, UTM10), Kind = "fragmented"),
                  cbind(tabulate_areas(whole_tracts, UTM10), Kind = "whole")) %>% as.tbl()

GM <- function (x) exp(mean(log(x))) # geometric mean
N <- function (x) sum(!is.na(x))

area_tbl %>% 
  group_by(Kind) %>% 
  summarise_each(funs(N, min, mean, median, GM, max)) %>% 
  format(digits = 2)
##         Kind   N   min mean median  GM   max
## 1 fragmented 270 0.173 4.46   1.55 1.8 122.0
## 2      whole 338 0.057 0.97   0.76 0.7   6.9
inner_fragments <- gIntersection(CARE_polygons, fragmented_tracts, byid = TRUE)
fragment_tbl <- tabulate_areas(inner_fragments, UTM10)
fragment_tbl$tract_FID <- str_split_fixed(row.names(fragment_tbl), " ", n=2)[,2]
fragment_tbl <- fragment_tbl %>% group_by(tract_FID) %>% summarise(km2 = sum(km2))

tract_tbl <- tabulate_areas(fragmented_tracts, UTM10)
tract_tbl$tract_FID <- row.names(tract_tbl)

merged <- fragment_tbl %>% 
  inner_join(tract_tbl, by = "tract_FID") %>% 
  mutate(km2_frac = pmin(1.0, km2.x / km2.y)) 

merged %>% group_by(inner_fraction = cut(km2_frac, seq(0, 1, len=11))) %>% summarise(N = n())
## Source: local data frame [10 x 2]
## 
##    inner_fraction   N
## 1         (0,0.1]  89
## 2       (0.1,0.2]   3
## 3       (0.2,0.3]   9
## 4       (0.3,0.4]   6
## 5       (0.4,0.5]   9
## 6       (0.5,0.6]   8
## 7       (0.6,0.7]  11
## 8       (0.7,0.8]  10
## 9       (0.8,0.9]  14
## 10        (0.9,1] 111
par(mar=c(0, 0, 0, 0))
shade_polygons(whole_tracts, add = FALSE)
shade_polygons(fragmented_tracts, gray(0.8))
i <- subset(merged, km2_frac > 0.5)$tract_FID
shade_polygons(fragmented_tracts[i,])
draw_outlines(CARE_polygons)

plot of chunk unnamed-chunk-1

Conclusion

There are 154 tracts with more than half their area inside the CARE boundaries. If we add these to the wholly enclosed tracts, we arrive at a final estimate of 154 + 338 = 492 tracts within the CARE boundaries.

Using an alternative heuristic, there are 489 tracts whose centroid falls within a CARE region. (Nearly equal.)