knitr::opts_chunk$set(echo = TRUE)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
This dataset contains information from the Chicago Police Department from 2012 to 2017
crimes_12_to_17_raw <- read.csv("Chicago_Crimes_2012_to_2017.csv", stringsAsFactors = FALSE)
crimes_12_to_17 <- na.omit(crimes_12_to_17_raw)
head(crimes_12_to_17)
## X ID Case.Number Date Block
## 1 3 10508693 HZ250496 05/03/2016 11:40:00 PM 013XX S SAWYER AVE
## 2 89 10508695 HZ250409 05/03/2016 09:40:00 PM 061XX S DREXEL AVE
## 3 197 10508697 HZ250503 05/03/2016 11:31:00 PM 053XX W CHICAGO AVE
## 4 673 10508698 HZ250424 05/03/2016 10:10:00 PM 049XX W FULTON ST
## 5 911 10508699 HZ250455 05/03/2016 10:00:00 PM 003XX N LOTUS AVE
## 6 1108 10508702 HZ250447 05/03/2016 10:35:00 PM 082XX S MARYLAND AVE
## IUCR Primary.Type Description Location.Description
## 1 0486 BATTERY DOMESTIC BATTERY SIMPLE APARTMENT
## 2 0486 BATTERY DOMESTIC BATTERY SIMPLE RESIDENCE
## 3 0470 PUBLIC PEACE VIOLATION RECKLESS CONDUCT STREET
## 4 0460 BATTERY SIMPLE SIDEWALK
## 5 0820 THEFT $500 AND UNDER RESIDENCE
## 6 041A BATTERY AGGRAVATED: HANDGUN STREET
## Arrest Domestic Beat District Ward Community.Area FBI.Code X.Coordinate
## 1 True True 1022 10 24 29 08B 1154907
## 2 False True 313 3 20 42 08B 1183066
## 3 False False 1524 15 37 25 24 1140789
## 4 False False 1532 15 28 25 08B 1143223
## 5 False True 1523 15 28 25 06 1139890
## 6 False False 631 6 8 44 04B 1183336
## Y.Coordinate Year Updated.On Latitude Longitude
## 1 1893681 2016 05/10/2016 03:56:50 PM 41.86407 -87.70682
## 2 1864330 2016 05/10/2016 03:56:50 PM 41.78292 -87.60436
## 3 1904819 2016 05/10/2016 03:56:50 PM 41.89491 -87.75837
## 4 1901475 2016 05/10/2016 03:56:50 PM 41.88569 -87.74952
## 5 1901675 2016 05/10/2016 03:56:50 PM 41.88630 -87.76175
## 6 1850642 2016 05/10/2016 03:56:50 PM 41.74535 -87.60380
## Location
## 1 (41.864073157, -87.706818608)
## 2 (41.782921527, -87.60436317)
## 3 (41.894908283, -87.758371958)
## 4 (41.885686845, -87.749515983)
## 5 (41.886297242, -87.761750709)
## 6 (41.745354023, -87.603798903)
DATA WRANGLING
crimes_12_to_17$Primary.Type <- as.factor(crimes_12_to_17$Primary.Type)
crimes_12_to_17$Description <- as.factor(crimes_12_to_17$Description)
crimes_12_to_17$Location.Description <- as.factor(crimes_12_to_17$Location.Description)
crimes_12_to_17$IUCR <- as.factor(crimes_12_to_17$IUCR)
crimes_12_to_17$Arrest[which(crimes_12_to_17$Arrest == "True")] <- 1
crimes_12_to_17$Arrest[which(crimes_12_to_17$Arrest == "False")] <- 0
crimes_12_to_17$Domestic[which(crimes_12_to_17$Domestic == "True")] <- 1
crimes_12_to_17$Domestic[which(crimes_12_to_17$Domestic == "False")] <- 0
summary(crimes_12_to_17)
## X ID Case.Number
## Min. : 3 Min. : 20224 Length:1419591
## 1st Qu.:2696734 1st Qu.: 8987142 Class :character
## Median :3052459 Median : 9576373 Mode :character
## Mean :3273592 Mean : 9574958
## 3rd Qu.:3409868 3rd Qu.:10191992
## Max. :6253474 Max. :10823344
##
## Date Block IUCR
## Length:1419591 Length:1419591 0820 :133303
## Class :character Class :character 0486 :128283
## Mode :character Mode :character 0460 : 86510
## 0810 : 72957
## 1320 : 71362
## 1310 : 70342
## (Other):856834
## Primary.Type Description
## THEFT :322423 SIMPLE :147846
## BATTERY :258942 $500 AND UNDER :133303
## CRIMINAL DAMAGE:152813 DOMESTIC BATTERY SIMPLE:128283
## NARCOTICS :131177 TO VEHICLE : 74572
## ASSAULT : 89508 OVER $500 : 72957
## OTHER OFFENSE : 85362 TO PROPERTY : 70342
## (Other) :379366 (Other) :792288
## Location.Description Arrest
## STREET :325084 Length:1419591
## RESIDENCE :223854 Class :character
## APARTMENT :179444 Mode :character
## SIDEWALK :158478
## OTHER : 53474
## PARKING LOT/GARAGE(NON.RESID.): 40907
## (Other) :438350
## Domestic Beat District Ward
## Length:1419591 Min. : 111 Min. : 1.00 Min. : 1.00
## Class :character 1st Qu.: 613 1st Qu.: 6.00 1st Qu.:10.00
## Mode :character Median :1024 Median :10.00 Median :23.00
## Mean :1150 Mean :11.26 Mean :22.86
## 3rd Qu.:1711 3rd Qu.:17.00 3rd Qu.:34.00
## Max. :2535 Max. :31.00 Max. :50.00
##
## Community.Area FBI.Code X.Coordinate Y.Coordinate
## Min. : 0.00 Length:1419591 Min. : 0 Min. : 0
## 1st Qu.:23.00 Class :character 1st Qu.:1152544 1st Qu.:1858762
## Median :32.00 Mode :character Median :1166022 Median :1891502
## Mean :37.46 Mean :1164401 Mean :1885527
## 3rd Qu.:57.00 3rd Qu.:1176363 3rd Qu.:1908713
## Max. :77.00 Max. :1205119 Max. :1951573
##
## Year Updated.On Latitude Longitude
## Min. :2012 Length:1419591 Min. :36.62 Min. :-91.69
## 1st Qu.:2013 Class :character 1st Qu.:41.77 1st Qu.:-87.72
## Median :2014 Mode :character Median :41.86 Median :-87.67
## Mean :2014 Mean :41.84 Mean :-87.67
## 3rd Qu.:2015 3rd Qu.:41.91 3rd Qu.:-87.63
## Max. :2017 Max. :42.02 Max. :-87.52
##
## Location
## Length:1419591
## Class :character
## Mode :character
##
##
##
##
#install.packages('ggplot2')
library(ggplot2)
primary_type <- ggplot(crimes_12_to_17, aes(Primary.Type))
primary_type + geom_histogram(stat = "count") + coord_flip()
## Warning: Ignoring unknown parameters: binwidth, bins, pad
Top five types of crimes: Fill this in later
ggplot(crimes_12_to_17, aes(Year)) +
geom_density()
#install.packages('plotrix')
library(plotrix)
arrests <- table(crimes_12_to_17$Arrest)
lbls <- paste(names(arrests), "\n", arrests, sep="")
pie3D(arrests, labels = lbls,
main="Arrests results (1 = True, 0 = False) from Crimes commited ")
domestic <- table(crimes_12_to_17$Domestic)
lbls <- paste(names(domestic), "\n", domestic, sep="")
pie(domestic, labels = lbls,
main="Domestic results (1 = True, 0 = False) for Crimes commited ")
#levels(crimes_12_to_17$IUCR) #353 Levels
top10_iucr <- tail(names(sort(table(crimes_12_to_17$IUCR))), 10)
iucr_raw <- table(crimes_12_to_17$IUCR)
barplot(iucr_raw[order(iucr_raw, decreasing = TRUE)], xlim = c(0,11))
#levels(crimes_12_to_17$Description) #340 Levels
top10_description <- tail(names(sort(table(crimes_12_to_17$Description))), 10)
head(top10_description)
## [1] "FROM BUILDING" "AUTOMOBILE"
## [3] "FORCIBLE ENTRY" "POSS: CANNABIS 30GMS OR LESS"
## [5] "TO PROPERTY" "OVER $500"
#levels(crimes_12_to_17$Location.Description) #141 Levels
top10_location_description <- tail(names(sort(table(crimes_12_to_17$Location.Description))), 10)
head(top10_location_description)
## [1] "SCHOOL, PUBLIC, BUILDING" "SMALL RETAIL STORE"
## [3] "RESIDENTIAL YARD (FRONT/BACK)" "ALLEY"
## [5] "PARKING LOT/GARAGE(NON.RESID.)" "OTHER"
#street, residence, apartment, etc...
#location_description_raw <- table(crimes_12_to_17$Location.Description)
#barplot(location_description_raw[order(location_description_raw, decreasing = TRUE)], xlim = c(0,11))
#scaling the graph is impossible
crimes_12_to_17$Beat <- as.factor(crimes_12_to_17$Beat) #Put this at the beggining of the report
#levels(crimes_12_to_17$Beat) #289 Levels
top10_beat <- tail(names(sort(table(crimes_12_to_17$Beat))), 10)
beat_raw <- table(crimes_12_to_17$Beat)
barplot(beat_raw[order(beat_raw, decreasing = TRUE)], xlim = c(0,11))
crimes_12_to_17$District <- as.factor(crimes_12_to_17$District) #Put this at the beggining of the report
#levels(crimes_12_to_17$District) #23 Levels
top10_district <- tail(names(sort(table(crimes_12_to_17$District))), 10)
district_raw <- table(crimes_12_to_17$District)
barplot(district_raw[order(district_raw, decreasing = TRUE)], xlim = c(0,11))
crimes_12_to_17$Ward <- as.factor(crimes_12_to_17$Ward) #Put this at the beggining of the report
#levels(crimes_12_to_17$Ward) #45 Levels
top10_ward <- tail(names(sort(table(crimes_12_to_17$Ward))), 10)
ward_raw <- table(crimes_12_to_17$Ward)
barplot(ward_raw[order(ward_raw, decreasing = TRUE)], xlim = c(0,11))
crimes_12_to_17$Community.Area <- as.factor(crimes_12_to_17$Community.Area) #Put this at the beggining of the report
#levels(crimes_12_to_17$Community.Area) #67 Levels
top10_community_area <- tail(names(sort(table(crimes_12_to_17$Community.Area))), 10)
community_area_raw <- table(crimes_12_to_17$Community.Area)
barplot(community_area_raw[order(community_area_raw, decreasing = TRUE)], xlim = c(0,11))
crimes_12_to_17$FBI.Code <- as.factor(crimes_12_to_17$FBI.Code) #Put this at the beggining of the report
#levels(crimes_12_to_17$FBI.Code) #19 Levels
top10_fbi_code <- tail(names(sort(table(crimes_12_to_17$FBI.Code))), 10)
fbi_raw <- table(crimes_12_to_17$FBI.Code)
barplot(fbi_raw[order(fbi_raw, decreasing = TRUE)], xlim = c(0,11))
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.