In the previous topics on crimes in Chicago we have explored dataset from gov.data and applied Pareto principle to select the most relevant types and locations of crimes in Chicago for estimation the difference in types of crimes and locations versus day of the week and month.
See https://rpubs.com/alex-lev/248923, https://rpubs.com/alex-lev/249124, https://rpubs.com/alex-lev/249354, https://rpubs.com/alex-lev/249370.
Now we want to estimate criminal vilolence level in Chicago by the crime longitude and latitude.
For more about Chicago criminal data see https://catalog.data.gov/dataset/crimes-2001-to-present-398a4.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
chicago_crime<-readRDS(file = "chicago_crime.rds") # it takes two minutes to load compressed file in memory
dim(chicago_crime)
## [1] 6263265 22
names(chicago_crime)
## [1] "ID" "Case Number" "Date"
## [4] "Block" "IUCR" "Primary Type"
## [7] "Description" "Location Description" "Arrest"
## [10] "Domestic" "Beat" "District"
## [13] "Ward" "Community Area" "FBI Code"
## [16] "X Coordinate" "Y Coordinate" "Year"
## [19] "Updated On" "Latitude" "Longitude"
## [22] "Location"
chicago_crime <- na.omit(chicago_crime)# omitting NA records
chicago_crime$Day_week<-format(chicago_crime$Day_time,'%A')#day of the week
## Warning: Unknown column 'Day_time'
chicago_crime$Month<-format(chicago_crime$Day_time,'%B')# month
## Warning: Unknown column 'Day_time'
chicago_crime$M<-format(chicago_crime$Day_time,'%m')
## Warning: Unknown column 'Day_time'
chicago_crime$MM<-as.integer(chicago_crime$M) # number of month as integer
## Warning: NAs introduced by coercion
chicago_crime[,-23] %>% group_by(Year) %>% count() %>% arrange()
## # A tibble: 17 × 2
## Year n
## <int> <int>
## 1 2001 3842
## 2 2002 345007
## 3 2003 471993
## 4 2004 467128
## 5 2005 449870
## 6 2006 445495
## 7 2007 435527
## 8 2008 419787
## 9 2009 385830
## 10 2010 368410
## 11 2011 350472
## 12 2012 334396
## 13 2013 304267
## 14 2014 269329
## 15 2015 259608
## 16 2016 250694
## 17 2017 51
The main types of crime in Chicago are THEFT, BATTERY, CRIMINAL DAMAGE, NARCOTICS, OTHER OFFENSE, ASSAULT and BURGLARY - 80% of all crimes.
We’ll compare 2003 and 2016 for comparing various types of crime by latitude and longitude.
ch_thft <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="THEFT")
ch_thft_2003 <- filter(ch_thft,ch_thft$Year=="2003")
ch_thft_2016 <- filter(ch_thft,ch_thft$Year=="2016")
ggplot(ch_thft_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_thft_2003$Year)
ggplot(ch_thft_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_thft_2016$Year)
ch_bat <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="BATTERY")
ch_bat_2003 <- filter(ch_bat,ch_bat$Year=="2003")
ch_bat_2016 <- filter(ch_bat,ch_bat$Year=="2016")
ggplot(ch_bat_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_bat_2003$Year)
ggplot(ch_bat_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_bat_2016$Year)
ch_crm_dam <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="CRIMINAL DAMAGE")
ch_crm_dam_2003 <- filter(ch_crm_dam,ch_crm_dam$Year=="2003")
ch_crm_dam_2016 <- filter(ch_crm_dam,ch_crm_dam$Year=="2016")
ggplot(ch_crm_dam_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_crm_dam_2003$Year)
ggplot(ch_crm_dam_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_crm_dam_2016$Year)
ch_nar <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="NARCOTICS")
ch_nar_2003 <- filter(ch_nar,ch_nar$Year=="2003")
ch_nar_2016 <- filter(ch_nar,ch_nar$Year=="2016")
ggplot(ch_nar_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_nar_2003$Year)
ggplot(ch_nar_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_nar_2016$Year)
ch_oth_off <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="OTHER OFFENSE")
ch_oth_off_2003 <- filter(ch_oth_off,ch_oth_off$Year=="2003")
ch_oth_off_2016 <- filter(ch_oth_off,ch_oth_off$Year=="2016")
ggplot(ch_oth_off_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_oth_off_2003$Year)
ggplot(ch_oth_off_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_oth_off_2016$Year)
ch_ass <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="ASSAULT")
ch_ass_2003 <- filter(ch_ass,ch_ass$Year=="2003")
ch_ass_2016 <- filter(ch_ass,ch_ass$Year=="2016")
ggplot(ch_ass_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_ass_2003$Year)
ggplot(ch_ass_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_ass_2016$Year)
ch_bug <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="BURGLARY")
ch_bug_2003 <- filter(ch_bug,ch_bug$Year=="2003")
ch_bug_2016 <- filter(ch_bug,ch_bug$Year=="2016")
ggplot(ch_bug_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_bug_2003$Year)
ggplot(ch_bug_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_bug_2016$Year)
ch_hom <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="HOMICIDE")
ch_hom_2003 <- filter(ch_hom,ch_hom$Year=="2003")
ch_hom_2016 <- filter(ch_hom,ch_hom$Year=="2016")
ggplot(ch_hom_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_hom_2003$Year)
ggplot(ch_hom_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_hom_2016$Year)
ch_wep <- filter(chicago_crime[,-23],chicago_crime$`Primary Type`=="WEAPONS VIOLATION")
ch_wep_2003 <- filter(ch_wep,ch_wep$Year=="2003")
ch_wep_2016 <- filter(ch_wep,ch_wep$Year=="2016")
ggplot(ch_wep_2003,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_wep_2003$Year)
ggplot(ch_wep_2016,aes(Longitude,Latitude)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon") +
facet_wrap(~ch_wep_2016$Year)