Here is the code:
# load libraries
library(plyr)
library(tidyverse)
library(purrr)
library(sf)
library(sp)
library(spatial)
library(ggspatial)
freq<-plyr::count
# get shape file from Moco Open Data
tempfile("cbg",fileext = ".zip")
## [1] "/tmp/Rtmp4ZtHuF/cbg655941f45fbc.zip"
unlink(paste(tempdir(),"/geo_export*.*",collapse = "",sep=""))
zip<-paste(tempdir(),"/cbg.zip",collapse="",sep="")
download.file("https://data.montgomerycountymd.gov/api/geospatial/y2z8-tsry?method=export&format=Shapefile",zip)
unzip(zipfile = zip,exdir = tempdir())
fn<-paste(tempdir(),"/",list.files(path=tempdir(),pattern = "^geo_export[^.]+.shp$"),collapse = "",sep = "")
censusBlockGroups<-st_read(fn,stringsAsFactors = FALSE)
## Reading layer `geo_export_72c2c416-c1bf-4e6b-bea0-46485f4130e7' from data source `/tmp/Rtmp4ZtHuF/geo_export_72c2c416-c1bf-4e6b-bea0-46485f4130e7.shp' using driver `ESRI Shapefile'
## Simple feature collection with 614 features and 33 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -77.52769 ymin: 38.93434 xmax: -76.88851 ymax: 39.3535
## epsg (SRID): 4326
## proj4string: +proj=longlat +ellps=WGS84 +no_defs
censusBlockGroups<- st_transform(
censusBlockGroups, "+proj=longlat +ellps=WGS84 +datum=WGS84"
)
censusBlockGroups$longitude<-as.numeric(censusBlockGroups$longitude)
censusBlockGroups$latitude<-as.numeric(censusBlockGroups$latitude)
# compute the bounding boxes of each census block group and bind columns
bb<-as.data.frame.matrix(sapply(censusBlockGroups$geometry,st_bbox))
censusBlockGroups<-cbind(censusBlockGroups,t(bb))
# Function to return the index of census block that intersects the location
insideCensusBlockGroup<-function(lon,lat) {
# identify block group bounding boxes that enclose lon and lat
insideBgBox<-function(lon,lat) {
a<-(
lon>=censusBlockGroups$xmin &
lat>=censusBlockGroups$ymin &
lon<=censusBlockGroups$xmax &
lat<=censusBlockGroups$ymax
)
list(which(a))
}
# for all locations that fall into multiple bboxes
# pick the first polygon that fits
# assumes polygons do not overlap
insideBgPoly<-function(possibles,lon,lat) {
if (length(possibles)==1) return(list(possibles))
loc<-st_sfc(
crs="+proj=longlat +ellps=WGS84 +datum=WGS84",
# crs="+proj=longlat +ellps=WGS84 +no_defs",
st_multipoint(
as.matrix(cbind(lon,lat)))
)
# loc<-st_as_sf(
# data.frame(longitude=lon,latitude=lat),
# coords=c("longitude","latitude"),
# crs="+proj=longlat +ellps=WGS84 +no_defs"
# )
innies<-unlist(st_intersects(loc,censusBlockGroups$geometry[possibles]))
i<-length(innies)
# browser(expr = i==0)
if (i==1) return(list(possibles[1]))
if (i==0) return(list(NA))
# shouldn't be more than 1, but choose random
list(possibles[1+round(runif(i-1))])
}
bb<-mapply(insideBgBox,lon,lat)
bb<-mapply(insideBgPoly,bb,lon,lat)
unlist(bb)
}
# see how well function insideCensusBlockGroup categorizes it's centroids:
d<-insideCensusBlockGroup(censusBlockGroups$longitude,censusBlockGroups$latitude)
# if correct summary should be true for all block groups:
summary(d==(1:614))
## Mode FALSE TRUE
## logical 235 379
# command line hacking:
# test inside block group
getMeanPolygon<-function(pg) {
pts<-eval(parse(text=as.character(pg)))
lon<-(pts[pts<0])[1]
lat<-(pts[pts>0])[1]
# lon<-mean(pts[pts<0])
# lat<-mean(pts[pts>0])
return(list(longitude=lon,latitude=lat))
}
meanBg<-sapply(FUN=getMeanPolygon,censusBlockGroups$geometry)
meanBg<-as.data.frame.matrix(t(meanBg))
# Try just using the spatial functions:
a<-cbind(censusBlockGroups$longitude,censusBlockGroups$latitude)
mpts<-as.matrix(a)
npts<-st_sfc(st_multipoint(mpts), crs = 4326)
#b<-mapply(st_intersects,npts,censusBlockGroups$geometry)
d<-st_intersects(npts,censusBlockGroups$geometry)
#see if d correctly assigns intersections to correct row:
summary(d[[1]]==(1:614))
## Mode TRUE
## logical 614
# plot census tract 700710.0
ctr700710<-grep("700710.0",censusBlockGroups$tract)
ggtr<-ggplot(data=censusBlockGroups[ctr700710,]) +
geom_sf() +
layer_spatial(st_cast(npts,"POINT")[ctr700710])+
coord_sf(expand = FALSE)
# coord_sf(xlim = c(-102.15, -74.12), ylim = c(7.65, 33.97), expand = FALSE)
ggtr
