Load data

mvt <- read.csv("mvt.csv", stringsAsFactor=FALSE)
mvt$Date <- strptime(mvt$Date, "%m/%d/%y %H:%M")
mvt$Weekday <- weekdays(mvt$Date)
mvt$Hour <- mvt$Date$hour

Graphs

First attempt to get crimes breakdown by week days

library(ggplot2)

mvtByWeekday <- as.data.frame(table(mvt$Weekday))
ggplot(mvtByWeekday, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))

Sort WeekDays factor variable

mvt$Weekday <- factor(mvt$Weekday,
                      levels=c("Sunday", "Monday", "Tuesday", "Wednesday",
                               "Thursday", "Friday", "Saturday"), 
                      ordered=TRUE)
mvtByWeekday <- as.data.frame(table(mvt$Weekday))
ggplot(mvtByWeekday, aes(x=Var1, y=Freq)) + geom_line(aes(group=1)) +
  xlab("Day of week") + ylab("Motor vehicle theft")

Break down by week days and hours

mvtDayHour <- as.data.frame(table(mvt$Weekday, mvt$Hour))
mvtDayHour$Hour <- as.numeric(as.character(mvtDayHour$Var2))
ggplot(mvtDayHour, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Var1))

Generate heat map

mvtDayHour$Var1 <- factor(mvtDayHour$Var1, ordered=TRUE,
                          levels=c("Monday", "Tuesday", "Wednesday",
                                   "Thursday", "Friday", "Saturday",
                                   "Sunday"))
ggplot(mvtDayHour, aes(x=Hour, y=Var1)) + geom_tile(aes(fill = Freq)) +
  scale_fill_gradient(name="Total Motor Vehicle Theft", low="white", high="red") +
  theme(axis.title.y = element_blank())

Load map of Chicago

library(maps)
## Warning: package 'maps' was built under R version 3.1.3
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.1.3
chicago <- get_map(location="Chicago", zoom=11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Chicago&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Chicago&sensor=false
ggmap(chicago)

Plot first 100 motor vehicle thefts on Chicago map

ggmap(chicago) + geom_point(data=mvt[1:100,], aes(x=Longitude, y=Latitude))
## Warning: Removed 7 rows containing missing values (geom_point).

Round latitude and longitude to 2 places of decimal accuracy

LatLonCounts <- as.data.frame(table(round(mvt$Longitude, 2), round(mvt$Latitude, 2)))
LatLonCounts$Long <- as.numeric(as.character(LatLonCounts$Var1))
LatLonCounts$Var1 <- NULL
LatLonCounts$Lat <- as.numeric(as.character(LatLonCounts$Var2))
LatLonCounts$Var2 <- NULL
str(LatLonCounts)
## 'data.frame':    1638 obs. of  3 variables:
##  $ Freq: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Long: num  -87.9 -87.9 -87.9 -87.9 -87.9 ...
##  $ Lat : num  41.6 41.6 41.6 41.6 41.6 ...

Plot MVT over Chicago

ggmap(chicago) + geom_point(data=LatLonCounts, 
                            aes(x=Long, y=Lat, color=Freq, size=Freq)) +
  scale_color_gradient(low="yellow", high="red")
## Warning: Removed 615 rows containing missing values (geom_point).

Another plot over Chicago, more like heat map.

ggmap(chicago) + geom_tile(data=LatLonCounts,
                           aes(x=Long, y=Lat, alpha=Freq), fill="red")

Remove all long/lat with zero crime

LatLonCounts2 <- subset(LatLonCounts, LatLonCounts$Freq > 0)
ggmap(chicago) + geom_tile(data=LatLonCounts2,
                           aes(x=Long, y=Lat, alpha=Freq), fill="red")