Ben Bellman
December 5, 2017
Talk with one or two neighbors, jot down ideas:
Legal definition of ownership and usage rights
library(tidyverse)
library(readxl)
library(rgdal)
library(ggmap)
nyc_sqf <- read_xlsx("~/Google Drive/Computer Backup/R Workshop/Data/2014_sqf_web.xlsx")
select(nyc_sqf, xcoord, ycoord) %>%
summary()
xcoord ycoord
Min. : 914151 Min. :121420
1st Qu.: 990886 1st Qu.:176014
Median :1005210 Median :192836
Mean :1005444 Mean :198626
3rd Qu.:1023191 3rd Qu.:221223
Max. :1066574 Max. :271882
NA's :1650 NA's :1650
lon_lat <- nyc_sqf %>%
select(xcoord, ycoord) %>%
as.matrix() %>%
project(proj = "+proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs", inv = TRUE)
nyc_sqf$lon <- lon_lat[,1]
nyc_sqf$lat <- lon_lat[,2]
select(nyc_sqf, lon, lat) %>%
summary()
lon lat
Min. :-74.25 Min. :40.50
1st Qu.:-73.98 1st Qu.:40.65
Median :-73.92 Median :40.70
Mean :-73.92 Mean :40.71
3rd Qu.:-73.86 3rd Qu.:40.77
Max. :-73.70 Max. :40.91
NA's :1650 NA's :1650
ggmap is an extension of ggplot2 library for graphicsget_map(location = "new york",
maptype = "satellite",
zoom = 11) %>%
ggmap() +
coord_cartesian() +
geom_point(data = nyc_sqf, aes(lon, lat),
alpha = 0.05, color = "#ce1c1c", na.rm = TRUE)
library(tmap)
data(Europe, land, rivers, metro)
tm_shape(land) +
tm_raster("trees", breaks=seq(0, 100, by=20), legend.show = FALSE) +
tm_shape(Europe, is.master = TRUE) +
tm_borders() +
tm_shape(rivers) +
tm_lines(lwd="strokelwd", scale=5, legend.lwd.show = FALSE) +
tm_shape(metro) +
tm_bubbles("pop2010", "red", border.col = "black", border.lwd=1,
size.lim = c(0, 11e6), sizes.legend = c(1e6, 2e6, 4e6, 6e6, 10e6),
title.size="Metropolitan Population") +
tm_text("name", size="pop2010", scale=1, root=4, size.lowerbound = .6,
bg.color="white", bg.alpha = .75,
auto.placement = 1, legend.size.show = FALSE) +
tm_format_Europe() +
tm_style_natural()
library(sp)
nyc_sqf <- filter(nyc_sqf, is.na(lon) == F)
coordinates(nyc_sqf) <- select(nyc_sqf, lon, lat)
class(nyc_sqf)
[1] "SpatialPointsDataFrame"
attr(,"package")
[1] "sp"
slotNames(nyc_sqf)
[1] "data" "coords.nrs" "coords" "bbox" "proj4string"
plot(nyc_sqf)
spatstat has many tools to analyze point patternslibrary(spatstat)
library(maptools)
sample <- nyc_sqf[nyc_sqf$datestop==1092014,] %>% as.ppp()
window <- convexhull(sample) %>% as.owin()
sample[window] %>%
envelope(fun = Kest) %>%
plot(main = "Clustering in one day of NYPD stops")
Generating 99 simulations of CSR ...
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99.
Done.
sample[window] %>%
density() %>%
plot(main = "Heat map of stops for one day")
sa <- readOGR("/Users/benjaminbellman/Dropbox (Brown)/Police/Data/Final Files", "SanAntonio_1015")
OGR data source with driver: ESRI Shapefile
Source: "/Users/benjaminbellman/Dropbox (Brown)/Police/Data/Final Files", layer: "SanAntonio_1015"
with 366 features
It has 81 fields
Integer64 fields read as strings: REGIONA DIVISIONA STATEA COUNTYA COUSUBA PLACEA TRACTA CONCITA AIANHHA RES_ONLYA TRUSTA AITSCEA ANRCA CBSAA CSAA METDIVA NECTAA CNECTAA NECTADIVA UAA CDCURRA SLDUA SLDLA ZCTA5A SUBMCDA SDELMA SDSECA SDUNIA PUMA5A BTTRA BLKGRPA BTBGA medhhinc totpop white black asian hisp nonwhite Rowid_ ZONE_CODE COUNT
qtm(sa, fill="divseg", fill.title="")
library(spdep)
library(classInt)
weights <- poly2nb(sa) %>% nb2listw()
locm <- localmoran(sa$gini, weights)
sa$Sgini <- scale(sa$gini)
sa$lag <- lag.listw(weights, sa$Sgini)
sa$pval <- locm[, 5]
sa$quad_sig <- ifelse(sa$Sgini >= 0 & sa$lag >= 0 & sa$pval <= 0.05, 1,
ifelse(sa$Sgini <= 0 & sa$lag <= 0 & sa$pval <= 0.05, 2,
ifelse(sa$Sgini >= 0 & sa$lag <= 0 & sa$pval <= 0.05, 3,
ifelse(sa$Sgini >= 0 & sa$lag <= 0 & sa$pval <= 0.05, 4, 5))))
breaks <- seq(1, 5, 1)
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Signif.")
np <- findInterval(sa$quad_sig, breaks)
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
plot(sa, col = colors[np])
mtext("Local Moran's I\nof Gini Index", cex = 1.5, side = 3, line = 1)
legend("topright", legend = labels, fill = colors, bty = "n")