San Fransicso Crime Data Analysis

Loading Libraries and Data

library(ggmap)
library(mapproj)
library(plyr)

Get Map of SF

sf = c(lon = -122.4167, lat = 37.7833)
sf.map = get_map(location = sf, zoom = 12, color = "color")
SF_MAP <- ggmap(sf.map, extent = "panel", maprange = FALSE)
SF_MAP

plot of chunk loadmap

Load crime data

SFPD_Incidents <- read.csv("C:/Users/LukasHalim/Documents/Dropbox/Leada/SFPD_Incidents.csv")

Subset to look at just violent crimes

violent_crime_types <- c("ARSON", "SUICIDE", "ASSAULT", "SEX OFFENSES, FORCIBLE", 
    "ROBBERY")
violent_crimes <- subset(SFPD_Incidents, Category %in% violent_crime_types)
coordinates <- violent_crimes

Loading Shapefile using code found in tutorial: http://www.r-bloggers.com/shapefile-polygons-plotted-on-google-maps-using-ggmap-in-r-throw-some-throw-some-stats-on-that-mappart-2/

library(rgdal)
PD_Districts <- readOGR("C:\\Users\\LukasHalim\\Documents\\Dropbox\\Leada\\sfpd_districts", 
    "sfpd_districts")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\LukasHalim\Documents\Dropbox\Leada\sfpd_districts", layer: "sfpd_districts"
## with 10 features and 3 fields
## Feature type: wkbPolygon with 2 dimensions
PD_Districts_df <- fortify(spTransform(PD_Districts, CRS("+proj=longlat +datum=WGS84")))

Retreiving the center of each district so that the district name can be placed there. Had difficulty retrieving latitude and longitude of shapefile centers. Explanation found here: http://stackoverflow.com/questions/16462290/obtaining-latitude-and-longitude-with-from-spatial-objects-in-r

centers <- coordinates(spTransform(PD_Districts, CRS("+proj=longlat +datum=WGS84")))
DistrictCenters <- cbind(PD_Districts@data, centers[, 1], centers[, 2])
DistrictCenters <- rename(DistrictCenters, c(`centers[, 1]` = "long", `centers[, 2]` = "lat"))
SF_MAP <- SF_MAP + geom_text(data = DistrictCenters, aes(x = long, y = lat, 
    label = DISTRICT, group = NULL), size = 2.5)
SF_MAP <- SF_MAP + geom_polygon(aes(x = long, y = lat, group = group), fill = "grey", 
    size = 0.2, color = "black", data = PD_Districts_df, alpha = 0)

Display Map with police districts from shapefile overlayed

SF_MAP

plot of chunk Display Map with Districts

Get the areas for each PD District Total area is about 45 square miles… pretty close to the figure given on wikipedia

PD_Areas <- sapply(1:10, function(x) (PD_Districts@polygons[[x]]@area))/27878400
sum(PD_Areas)
## [1] 44.95

Divide violent crimes by square mile

violent_crimes_per_sq_mi <- prop.table(table(violent_crimes$PdDistrict)/PD_Areas)
violent_crimes_per_sq_mi <- cbind(violent_crimes_per_sq_mi = as.vector(violent_crimes_per_sq_mi), 
    id = 0:9)

PD_Districts_df <- merge(PD_Districts_df, violent_crimes_per_sq_mi, by = c("id"))
SF_MAP <- SF_MAP + geom_polygon(aes(x = long, y = lat, group = group, alpha = as.factor(violent_crimes_per_sq_mi)), 
    size = 0.1, fill = "red", color = "black", data = PD_Districts_df)
SF_MAP <- SF_MAP + theme(strip.background = element_blank(), strip.text.y = element_blank(), 
    legend.text = element_blank(), legend.title = element_blank(), legend.position = "none")

Display map with higest density of violent crimes shown in red by district

SF_MAP

plot of chunk unnamed-chunk-2

Crimes by Date/Time

Friday looks a bit higher than other nights of the week

SFPD_Incidents$DayOfWeek <- factor(SFPD_Incidents$DayOfWeek, c("Monday", "Tuesday", 
    "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
plot(table(SFPD_Incidents$DayOfWeek), "o")

plot of chunk Crimes variation over time

by_date <- table(SFPD_Incidents$Date)
# Very steep drop towards the end of the month. Perhaps the data is
# incomplete - such a large decline needs some explanation
plot(by_date, type = "o")

plot of chunk By Date

Fewest crimes around 4-6 AM. Most crimes around 6-7 PM.

by_hour <- table(substring(SFPD_Incidents$Time, 1, 2))
plot(by_hour, type = "o")

plot of chunk By Time

Show crime frequency by category (For crimes with frequency > 1%)

crimes <- as.data.frame(table(SFPD_Incidents$Category))
crimes <- crimes[crimes$Freq > nrow(SFPD_Incidents)/200, ]
crimes$Var1 <- with(crimes, factor(crimes$Var1, levels = crimes[order(-Freq), 
    ]$Var1))
ggplot(crimes, aes(x = factor(Var1), y = Freq)) + geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot of chunk Crimes By Type

No resolution most frequent in Bayview and Tenderloin

prop.table(table(SFPD_Incidents$PdDistrict, SFPD_Incidents$Resolution == "NONE"), 
    1) * 100
##             
##              FALSE  TRUE
##   BAYVIEW    45.96 54.04
##   CENTRAL    27.64 72.36
##   INGLESIDE  32.99 67.01
##   MISSION    44.53 55.47
##   NORTHERN   33.73 66.27
##   PARK       33.67 66.33
##   RICHMOND   28.98 71.02
##   SOUTHERN   34.45 65.55
##   TARAVAL    28.56 71.44
##   TENDERLOIN 52.31 47.69

Show crime maps by category (for high frequency crime categories)

for (i in unique(crimes$Var1)) {
    print(i)
    crime_subset <- SFPD_Incidents[SFPD_Incidents$Category == i, ]
    print(ggmap(sf.map, extent = "panel", baselayer = ggplot(aes(x = X, y = Y), 
        data = crime_subset), maprange = FALSE) + stat_density2d(aes(x = X, 
        y = Y, fill = ..level.., alpha = ..level..), size = 1, bins = 25, data = crime_subset, 
        geom = "polygon") + scale_alpha(range = c(0.5, 0.75), guide = FALSE) + 
        scale_fill_gradient(limits = c(0, 300)))
}
## [1] "ASSAULT"

plot of chunk Contour Plot for each most frequent crime types

## [1] "BURGLARY"

plot of chunk Contour Plot for each most frequent crime types

## [1] "DRUG/NARCOTIC"

plot of chunk Contour Plot for each most frequent crime types

## [1] "FRAUD"

plot of chunk Contour Plot for each most frequent crime types

## [1] "LARCENY/THEFT"

plot of chunk Contour Plot for each most frequent crime types

## [1] "MISSING PERSON"

plot of chunk Contour Plot for each most frequent crime types

## [1] "NON-CRIMINAL"

plot of chunk Contour Plot for each most frequent crime types

## [1] "OTHER OFFENSES"

plot of chunk Contour Plot for each most frequent crime types

## [1] "ROBBERY"

plot of chunk Contour Plot for each most frequent crime types

## [1] "SUSPICIOUS OCC"

plot of chunk Contour Plot for each most frequent crime types

## [1] "TRESPASS"

plot of chunk Contour Plot for each most frequent crime types

## [1] "VANDALISM"

plot of chunk Contour Plot for each most frequent crime types

## [1] "VEHICLE THEFT"

plot of chunk Contour Plot for each most frequent crime types

## [1] "WARRANTS"

plot of chunk Contour Plot for each most frequent crime types

## [1] "WEAPON LAWS"

plot of chunk Contour Plot for each most frequent crime types