Prepare and Load Data

crime <- read.csv('./data/SFPD_Incidents_-_from_1_January_2003.csv', stringsAsFactors = FALSE)
crime$datetime <- as.POSIXct(paste(crime$Date, crime$Time, sep = " "), format="%m/%d/%Y %H:%M")
# order the factors so they sort on plots
crime$DayOfWeek <- factor(crime$DayOfWeek, levels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))
crime$Category <- as.factor(crime$Category)
crime <- crime[crime$datetime < "2016-01-01",]
crime <- crime[crime$Y < 38,]

Basic Graphics

dayCounts <- table(crime$DayOfWeek)
barplot(dayCounts, main = "Number of crimes committed by day of week", xlab = "Day of week")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3

qplot(data=crime, x = DayOfWeek, geom = "bar")

plot.dayOfWeek <- ggplot(crime, aes(x=DayOfWeek))
plot.dayOfWeek + geom_bar()

plot.category <- ggplot(crime, aes(x=Category))
plot.category + geom_bar()

plot.category + 
    geom_bar() + 
    theme(axis.text.x=element_text(angle=45,hjust=1)) + 
    ggtitle("Crime frequency by category")

library(scales)
plot.category + geom_bar() +
    theme(axis.text.x=element_text(angle=45,hjust=1)) +
    ggtitle("Crime frequency by category") +
    scale_y_continuous(labels = comma)

Time-series Graphics

library(lubridate)
## Warning: package 'lubridate' was built under R version 3.2.3
crime$monthround <- as.POSIXct(floor_date(crime$datetime, "month"))
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:lubridate':
## 
##     here
monthlycrime <- ddply(.data = crime, .variables = .(monthround), .fun = summarize, "count" = length(PdId)) # create new column called count

time.plot <- ggplot(monthlycrime, aes(x = monthround, y = count))
time.plot + geom_line()

# month and year factors
monthlycrime$month <- as.factor(strftime(monthlycrime$monthround, "%m"))
monthlycrime$year <- as.factor(strftime(monthlycrime$monthround, "%Y"))
time.yoy <- ggplot(monthlycrime[monthlycrime$monthround >= "2011-01-01",], aes(x = month, y = count, group = year))
# color the lines by group, make them thicker
time.yoy + geom_line(aes(color = year), size = 1.2)

Visualizing Geocoded Data

library(ggmap)
## Warning: package 'ggmap' was built under R version 3.2.3
## 
## Attaching package: 'ggmap'
## The following object is masked _by_ '.GlobalEnv':
## 
##     crime
xmean = mean(crime$X)
ymean = mean(crime$Y)
sfmap <- qmap(location = c(xmean, ymean), zoom = 12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=37.767102,-122.422848&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
robbery <- subset(crime, Category == "ROBBERY" & datetime >= "2011-01-01")
sfmap + geom_point(data = robbery, aes(x = X, y = Y), alpha = .05, color = "blue")

sfmap + geom_point(data = robbery, aes(x = X, y = Y, color = PdDistrict), alpha = .1) +
    guides(colour = guide_legend(override.aes = list(alpha = 1))) +
    scale_color_brewer(palette = "Paired") +
    ggtitle("Robberies in San Francisco, 2011-2015")