library(ggmap)
library(mapproj)
library(plyr)
Get Map of SF
sf = c(lon = -122.4167, lat = 37.7833)
sf.map = get_map(location = sf, zoom = 12, color = "color")
SF_MAP <- ggmap(sf.map, extent = "panel", maprange = FALSE)
SF_MAP
Load crime data
SFPD_Incidents <- read.csv("C:/Users/LukasHalim/Documents/Dropbox/Leada/SFPD_Incidents.csv")
Subset to look at just violent crimes
violent_crime_types <- c("ARSON", "SUICIDE", "ASSAULT", "SEX OFFENSES, FORCIBLE",
"ROBBERY")
violent_crimes <- subset(SFPD_Incidents, Category %in% violent_crime_types)
coordinates <- violent_crimes
Loading Shapefile using code found in tutorial: http://www.r-bloggers.com/shapefile-polygons-plotted-on-google-maps-using-ggmap-in-r-throw-some-throw-some-stats-on-that-mappart-2/
library(rgdal)
PD_Districts <- readOGR("C:\\Users\\LukasHalim\\Documents\\Dropbox\\Leada\\sfpd_districts",
"sfpd_districts")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\LukasHalim\Documents\Dropbox\Leada\sfpd_districts", layer: "sfpd_districts"
## with 10 features and 3 fields
## Feature type: wkbPolygon with 2 dimensions
PD_Districts_df <- fortify(spTransform(PD_Districts, CRS("+proj=longlat +datum=WGS84")))
Retreiving the center of each district so that the district name can be placed there. Had difficulty retrieving latitude and longitude of shapefile centers. Explanation found here: http://stackoverflow.com/questions/16462290/obtaining-latitude-and-longitude-with-from-spatial-objects-in-r
centers <- coordinates(spTransform(PD_Districts, CRS("+proj=longlat +datum=WGS84")))
DistrictCenters <- cbind(PD_Districts@data, centers[, 1], centers[, 2])
DistrictCenters <- rename(DistrictCenters, c(`centers[, 1]` = "long", `centers[, 2]` = "lat"))
SF_MAP <- SF_MAP + geom_text(data = DistrictCenters, aes(x = long, y = lat,
label = DISTRICT, group = NULL), size = 2.5)
SF_MAP <- SF_MAP + geom_polygon(aes(x = long, y = lat, group = group), fill = "grey",
size = 0.2, color = "black", data = PD_Districts_df, alpha = 0)
SF_MAP
Get the areas for each PD District Total area is about 45 square miles… pretty close to the figure given on wikipedia
PD_Areas <- sapply(1:10, function(x) (PD_Districts@polygons[[x]]@area))/27878400
sum(PD_Areas)
## [1] 44.95
Divide violent crimes by square mile
violent_crimes_per_sq_mi <- prop.table(table(violent_crimes$PdDistrict)/PD_Areas)
violent_crimes_per_sq_mi <- cbind(violent_crimes_per_sq_mi = as.vector(violent_crimes_per_sq_mi),
id = 0:9)
PD_Districts_df <- merge(PD_Districts_df, violent_crimes_per_sq_mi, by = c("id"))
SF_MAP <- SF_MAP + geom_polygon(aes(x = long, y = lat, group = group, alpha = as.factor(violent_crimes_per_sq_mi)),
size = 0.1, fill = "red", color = "black", data = PD_Districts_df)
SF_MAP <- SF_MAP + theme(strip.background = element_blank(), strip.text.y = element_blank(),
legend.text = element_blank(), legend.title = element_blank(), legend.position = "none")
SF_MAP
Friday looks a bit higher than other nights of the week
SFPD_Incidents$DayOfWeek <- factor(SFPD_Incidents$DayOfWeek, c("Monday", "Tuesday",
"Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
plot(table(SFPD_Incidents$DayOfWeek), "o")
by_date <- table(SFPD_Incidents$Date)
# Very steep drop towards the end of the month. Perhaps the data is
# incomplete - such a large decline needs some explanation
plot(by_date, type = "o")
by_hour <- table(substring(SFPD_Incidents$Time, 1, 2))
plot(by_hour, type = "o")
crimes <- as.data.frame(table(SFPD_Incidents$Category))
crimes <- crimes[crimes$Freq > nrow(SFPD_Incidents)/200, ]
crimes$Var1 <- with(crimes, factor(crimes$Var1, levels = crimes[order(-Freq),
]$Var1))
ggplot(crimes, aes(x = factor(Var1), y = Freq)) + geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
prop.table(table(SFPD_Incidents$PdDistrict, SFPD_Incidents$Resolution == "NONE"),
1) * 100
##
## FALSE TRUE
## BAYVIEW 45.96 54.04
## CENTRAL 27.64 72.36
## INGLESIDE 32.99 67.01
## MISSION 44.53 55.47
## NORTHERN 33.73 66.27
## PARK 33.67 66.33
## RICHMOND 28.98 71.02
## SOUTHERN 34.45 65.55
## TARAVAL 28.56 71.44
## TENDERLOIN 52.31 47.69
for (i in unique(crimes$Var1)) {
print(i)
crime_subset <- SFPD_Incidents[SFPD_Incidents$Category == i, ]
print(ggmap(sf.map, extent = "panel", baselayer = ggplot(aes(x = X, y = Y),
data = crime_subset), maprange = FALSE) + stat_density2d(aes(x = X,
y = Y, fill = ..level.., alpha = ..level..), size = 1, bins = 25, data = crime_subset,
geom = "polygon") + scale_alpha(range = c(0.5, 0.75), guide = FALSE) +
scale_fill_gradient(limits = c(0, 300)))
}
## [1] "ASSAULT"
## [1] "BURGLARY"
## [1] "DRUG/NARCOTIC"
## [1] "FRAUD"
## [1] "LARCENY/THEFT"
## [1] "MISSING PERSON"
## [1] "NON-CRIMINAL"
## [1] "OTHER OFFENSES"
## [1] "ROBBERY"
## [1] "SUSPICIOUS OCC"
## [1] "TRESPASS"
## [1] "VANDALISM"
## [1] "VEHICLE THEFT"
## [1] "WARRANTS"
## [1] "WEAPON LAWS"