In the previous topics on crimes in Chicago we have explored dataset from gov.data and applied Pareto principle to select the most relevant types and locations of crimes in Chicago for estimation the difference in types of crimes and locations versus day of the week and month as wel as latitude and longitude.
See https://rpubs.com/alex-lev/248923, https://rpubs.com/alex-lev/249124, https://rpubs.com/alex-lev/249354, https://rpubs.com/alex-lev/249370, https://rpubs.com/alex-lev/249747.
Now we want to estimate criminal vilolence level in Chicago by the crime longitude and latitude during daytime and nighttime.
For more about Chicago criminal data see https://catalog.data.gov/dataset/crimes-2001-to-present-398a4.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
chicago_crime<-readRDS(file = "chicago_crime.rds") # it takes two minutes to load compressed file in memory
dim(chicago_crime)
## [1] 6263265 22
names(chicago_crime)
## [1] "ID" "Case Number" "Date"
## [4] "Block" "IUCR" "Primary Type"
## [7] "Description" "Location Description" "Arrest"
## [10] "Domestic" "Beat" "District"
## [13] "Ward" "Community Area" "FBI Code"
## [16] "X Coordinate" "Y Coordinate" "Year"
## [19] "Updated On" "Latitude" "Longitude"
## [22] "Location"
chicago_crime <- na.omit(chicago_crime)#omit NA
chicago_crime$Day_time=strptime(chicago_crime$Date, format="%m/%d/%Y %I:%M:%S %p")#special format for dates
dd_hms <- separate(chicago_crime,"Day_time",c("MDY","HMS"),sep = " ")#separate by two parts
dd_hms <- separate(dd_hms,"HMS",c("H","M","S"),convert=T)#separate hours (AM, PM) - H
The main types of crime in Chicago are THEFT, BATTERY, CRIMINAL DAMAGE, NARCOTICS, OTHER OFFENSE, ASSAULT and BURGLARY - 80% of all crimes. We reduce the list above to THEFT, BATTERY, NARCOTICS, ASSAULT and BURGLARY. Also we add HOMICIDE to the list.
We’ll compare 2003 and 2016 for various types of crime by latitude and longitude vursus daytime and nighttime. For this purpose we display contours of a 3d surface in 2d.
dd_thf <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="THEFT")
dd_thf_2003 <- filter(dd_thf,dd_thf$Year=="2003")
ggplot(dd_thf_2003,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_thf_2003$H)
dd_thf_2016 <- filter(dd_thf,dd_thf$Year=="2016")
ggplot(dd_thf_2016,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_thf_2016$H)
dd_bat <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="BATTERY")
dd_bat_2003 <- filter(dd_bat,dd_bat$Year=="2003")
ggplot(dd_bat_2003,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_bat_2003$H)
dd_bat_2016 <- filter(dd_bat,dd_bat$Year=="2016")
ggplot(dd_bat_2016,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_bat_2016$H)
dd_nar <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="NARCOTICS")
dd_nar_2003 <- filter(dd_nar,dd_nar$Year=="2003")
ggplot(dd_nar_2003,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_nar_2003$H)
dd_nar_2016 <- filter(dd_nar,dd_nar$Year=="2016")
ggplot(dd_nar_2016,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_nar_2016$H)
dd_ass <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="ASSAULT")
dd_ass_2003 <- filter(dd_ass,dd_ass$Year=="2003")
ggplot(dd_ass_2003,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_ass_2003$H)
dd_ass_2016 <- filter(dd_ass,dd_ass$Year=="2016")
ggplot(dd_ass_2016,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_ass_2016$H)
dd_bug <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="BURGLARY")
dd_bug_2003 <- filter(dd_bug,dd_bug$Year=="2003")
ggplot(dd_bug_2003,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_bug_2003$H)
dd_bug_2016 <- filter(dd_bug,dd_bug$Year=="2016")
ggplot(dd_bug_2016,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_bug_2016$H)
dd_hom <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="HOMICIDE")
dd_hom_2003 <- filter(dd_hom,dd_hom$Year=="2003")
ggplot(dd_hom_2003,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_hom_2003$H)
dd_hom_2016 <- filter(dd_hom,dd_hom$Year=="2016")
ggplot(dd_hom_2016,aes(Longitude,Latitude)) +
stat_density_2d() +
facet_wrap(~dd_hom_2016$H)