mvt <- read.csv("mvt.csv", stringsAsFactor=FALSE)
mvt$Date <- strptime(mvt$Date, "%m/%d/%y %H:%M")
mvt$Weekday <- weekdays(mvt$Date)
mvt$Hour <- mvt$Date$hour
First attempt to get crimes breakdown by week days
library(ggplot2)
mvtByWeekday <- as.data.frame(table(mvt$Weekday))
ggplot(mvtByWeekday, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))
Sort WeekDays factor variable
mvt$Weekday <- factor(mvt$Weekday,
levels=c("Sunday", "Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday"),
ordered=TRUE)
mvtByWeekday <- as.data.frame(table(mvt$Weekday))
ggplot(mvtByWeekday, aes(x=Var1, y=Freq)) + geom_line(aes(group=1)) +
xlab("Day of week") + ylab("Motor vehicle theft")
Break down by week days and hours
mvtDayHour <- as.data.frame(table(mvt$Weekday, mvt$Hour))
mvtDayHour$Hour <- as.numeric(as.character(mvtDayHour$Var2))
ggplot(mvtDayHour, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Var1))
Generate heat map
mvtDayHour$Var1 <- factor(mvtDayHour$Var1, ordered=TRUE,
levels=c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday",
"Sunday"))
ggplot(mvtDayHour, aes(x=Hour, y=Var1)) + geom_tile(aes(fill = Freq)) +
scale_fill_gradient(name="Total Motor Vehicle Theft", low="white", high="red") +
theme(axis.title.y = element_blank())
Load map of Chicago
library(maps)
## Warning: package 'maps' was built under R version 3.1.3
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.1.3
chicago <- get_map(location="Chicago", zoom=11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Chicago&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Chicago&sensor=false
ggmap(chicago)
Plot first 100 motor vehicle thefts on Chicago map
ggmap(chicago) + geom_point(data=mvt[1:100,], aes(x=Longitude, y=Latitude))
## Warning: Removed 7 rows containing missing values (geom_point).
Round latitude and longitude to 2 places of decimal accuracy
LatLonCounts <- as.data.frame(table(round(mvt$Longitude, 2), round(mvt$Latitude, 2)))
LatLonCounts$Long <- as.numeric(as.character(LatLonCounts$Var1))
LatLonCounts$Var1 <- NULL
LatLonCounts$Lat <- as.numeric(as.character(LatLonCounts$Var2))
LatLonCounts$Var2 <- NULL
str(LatLonCounts)
## 'data.frame': 1638 obs. of 3 variables:
## $ Freq: int 0 0 0 0 0 0 0 0 0 0 ...
## $ Long: num -87.9 -87.9 -87.9 -87.9 -87.9 ...
## $ Lat : num 41.6 41.6 41.6 41.6 41.6 ...
Plot MVT over Chicago
ggmap(chicago) + geom_point(data=LatLonCounts,
aes(x=Long, y=Lat, color=Freq, size=Freq)) +
scale_color_gradient(low="yellow", high="red")
## Warning: Removed 615 rows containing missing values (geom_point).
Another plot over Chicago, more like heat map.
ggmap(chicago) + geom_tile(data=LatLonCounts,
aes(x=Long, y=Lat, alpha=Freq), fill="red")
Remove all long/lat with zero crime
LatLonCounts2 <- subset(LatLonCounts, LatLonCounts$Freq > 0)
ggmap(chicago) + geom_tile(data=LatLonCounts2,
aes(x=Long, y=Lat, alpha=Freq), fill="red")