Introduction

In the previous topics on crimes in Chicago we have explored dataset from gov.data and applied Pareto principle to select the most relevant types and locations of crimes in Chicago for estimation the difference in types of crimes and locations versus day of the week and month as wel as latitude and longitude.

See https://rpubs.com/alex-lev/248923, https://rpubs.com/alex-lev/249124, https://rpubs.com/alex-lev/249354, https://rpubs.com/alex-lev/249370, https://rpubs.com/alex-lev/249747.

Research goal

Now we want to estimate criminal vilolence level in Chicago by the crime longitude and latitude during daytime and nighttime.

Data

For more about Chicago criminal data see https://catalog.data.gov/dataset/crimes-2001-to-present-398a4.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)


chicago_crime<-readRDS(file = "chicago_crime.rds") # it takes two minutes to load compressed file in memory

dim(chicago_crime)
## [1] 6263265      22
names(chicago_crime)
##  [1] "ID"                   "Case Number"          "Date"                
##  [4] "Block"                "IUCR"                 "Primary Type"        
##  [7] "Description"          "Location Description" "Arrest"              
## [10] "Domestic"             "Beat"                 "District"            
## [13] "Ward"                 "Community Area"       "FBI Code"            
## [16] "X Coordinate"         "Y Coordinate"         "Year"                
## [19] "Updated On"           "Latitude"             "Longitude"           
## [22] "Location"

Preparing data

chicago_crime <- na.omit(chicago_crime)#omit NA

chicago_crime$Day_time=strptime(chicago_crime$Date, format="%m/%d/%Y %I:%M:%S %p")#special format for dates

dd_hms <- separate(chicago_crime,"Day_time",c("MDY","HMS"),sep = " ")#separate by two parts

dd_hms <- separate(dd_hms,"HMS",c("H","M","S"),convert=T)#separate hours (AM, PM) - H

Types of crime

The main types of crime in Chicago are THEFT, BATTERY, CRIMINAL DAMAGE, NARCOTICS, OTHER OFFENSE, ASSAULT and BURGLARY - 80% of all crimes. We reduce the list above to THEFT, BATTERY, NARCOTICS, ASSAULT and BURGLARY. Also we add HOMICIDE to the list.

We’ll compare 2003 and 2016 for various types of crime by latitude and longitude vursus daytime and nighttime. For this purpose we display contours of a 3d surface in 2d.

THEFT

dd_thf <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="THEFT")

dd_thf_2003 <- filter(dd_thf,dd_thf$Year=="2003")

ggplot(dd_thf_2003,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_thf_2003$H)

dd_thf_2016 <- filter(dd_thf,dd_thf$Year=="2016")

ggplot(dd_thf_2016,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_thf_2016$H)

BATTERY

dd_bat <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="BATTERY")

dd_bat_2003 <- filter(dd_bat,dd_bat$Year=="2003")

ggplot(dd_bat_2003,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_bat_2003$H)

dd_bat_2016 <- filter(dd_bat,dd_bat$Year=="2016")

ggplot(dd_bat_2016,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_bat_2016$H)

NARCOTICS

dd_nar <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="NARCOTICS")

dd_nar_2003 <- filter(dd_nar,dd_nar$Year=="2003")

ggplot(dd_nar_2003,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_nar_2003$H)

dd_nar_2016 <- filter(dd_nar,dd_nar$Year=="2016")

ggplot(dd_nar_2016,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_nar_2016$H)

ASSAULT

dd_ass <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="ASSAULT")

dd_ass_2003 <- filter(dd_ass,dd_ass$Year=="2003")

ggplot(dd_ass_2003,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_ass_2003$H)

dd_ass_2016 <- filter(dd_ass,dd_ass$Year=="2016")

ggplot(dd_ass_2016,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_ass_2016$H)

BURGLARY

dd_bug <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="BURGLARY")

dd_bug_2003 <- filter(dd_bug,dd_bug$Year=="2003")

ggplot(dd_bug_2003,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_bug_2003$H)

dd_bug_2016 <- filter(dd_bug,dd_bug$Year=="2016")

ggplot(dd_bug_2016,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_bug_2016$H)

HOMICIDE

dd_hom <- filter(dd_hms[,-23],dd_hms$`Primary Type`=="HOMICIDE")

dd_hom_2003 <- filter(dd_hom,dd_hom$Year=="2003")

ggplot(dd_hom_2003,aes(Longitude,Latitude)) + 
    stat_density_2d() +
    facet_wrap(~dd_hom_2003$H)

dd_hom_2016 <- filter(dd_hom,dd_hom$Year=="2016")

ggplot(dd_hom_2016,aes(Longitude,Latitude)) + 
  stat_density_2d() +
  facet_wrap(~dd_hom_2016$H)

Conclusions

  1. All types of crime have distinct dynamics from 2003 to 2016.
  2. Contours of a 3d surface in 2d for all types of crime demonstrate areas controlled by criminal bands.
  3. THEFT dynamics from 2003 to 2016 demonstrates low concentration of areas during the nighttime (01:02 AM) and daytime (12:17 PM).
  4. For BATTERY from 2003 to 2016 we can see easy hours in the early morning (04:07 AM).
  5. NARCOTICS have very interesting dynamics from 2003 to 2016. We see little activity in the night (02:03 AM) and early in the morning (05 AM) if any at all for 7 AM. Areas are getting narrow too.
  6. For ASSAULT we see easy hours in the night and early morning (02:06 AM).
  7. BURGLARY has very interesting dynamics too. We can see easy hours from 00 AM to 16 AM, but very stress hours in 17 PM, 19 PM and 21 PM. In 2003 we can see stress hours only in the midnight (01 AM : 02 AM).
  8. HOMICIDE dynamics demonstrate stress hours in the midnight (01 AM : 02 AM), in the morning (07 AM : 09 AM) and in the evening (17 AM : 19 AM).