knitr::opts_chunk$set(echo = TRUE)

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

This dataset contains information from the Chicago Police Department from 2012 to 2017

crimes_12_to_17_raw <- read.csv("Chicago_Crimes_2012_to_2017.csv", stringsAsFactors = FALSE)
crimes_12_to_17 <- na.omit(crimes_12_to_17_raw)
head(crimes_12_to_17)
##      X       ID Case.Number                   Date                Block
## 1    3 10508693    HZ250496 05/03/2016 11:40:00 PM   013XX S SAWYER AVE
## 2   89 10508695    HZ250409 05/03/2016 09:40:00 PM   061XX S DREXEL AVE
## 3  197 10508697    HZ250503 05/03/2016 11:31:00 PM  053XX W CHICAGO AVE
## 4  673 10508698    HZ250424 05/03/2016 10:10:00 PM    049XX W FULTON ST
## 5  911 10508699    HZ250455 05/03/2016 10:00:00 PM    003XX N LOTUS AVE
## 6 1108 10508702    HZ250447 05/03/2016 10:35:00 PM 082XX S MARYLAND AVE
##   IUCR           Primary.Type             Description Location.Description
## 1 0486                BATTERY DOMESTIC BATTERY SIMPLE            APARTMENT
## 2 0486                BATTERY DOMESTIC BATTERY SIMPLE            RESIDENCE
## 3 0470 PUBLIC PEACE VIOLATION        RECKLESS CONDUCT               STREET
## 4 0460                BATTERY                  SIMPLE             SIDEWALK
## 5 0820                  THEFT          $500 AND UNDER            RESIDENCE
## 6 041A                BATTERY     AGGRAVATED: HANDGUN               STREET
##   Arrest Domestic Beat District Ward Community.Area FBI.Code X.Coordinate
## 1   True     True 1022       10   24             29      08B      1154907
## 2  False     True  313        3   20             42      08B      1183066
## 3  False    False 1524       15   37             25       24      1140789
## 4  False    False 1532       15   28             25      08B      1143223
## 5  False     True 1523       15   28             25       06      1139890
## 6  False    False  631        6    8             44      04B      1183336
##   Y.Coordinate Year             Updated.On Latitude Longitude
## 1      1893681 2016 05/10/2016 03:56:50 PM 41.86407 -87.70682
## 2      1864330 2016 05/10/2016 03:56:50 PM 41.78292 -87.60436
## 3      1904819 2016 05/10/2016 03:56:50 PM 41.89491 -87.75837
## 4      1901475 2016 05/10/2016 03:56:50 PM 41.88569 -87.74952
## 5      1901675 2016 05/10/2016 03:56:50 PM 41.88630 -87.76175
## 6      1850642 2016 05/10/2016 03:56:50 PM 41.74535 -87.60380
##                        Location
## 1 (41.864073157, -87.706818608)
## 2  (41.782921527, -87.60436317)
## 3 (41.894908283, -87.758371958)
## 4 (41.885686845, -87.749515983)
## 5 (41.886297242, -87.761750709)
## 6 (41.745354023, -87.603798903)

DATA WRANGLING

crimes_12_to_17$Primary.Type <- as.factor(crimes_12_to_17$Primary.Type) 
crimes_12_to_17$Description <- as.factor(crimes_12_to_17$Description) 
crimes_12_to_17$Location.Description <- as.factor(crimes_12_to_17$Location.Description) 
crimes_12_to_17$IUCR <- as.factor(crimes_12_to_17$IUCR) 

crimes_12_to_17$Arrest[which(crimes_12_to_17$Arrest == "True")] <- 1
crimes_12_to_17$Arrest[which(crimes_12_to_17$Arrest == "False")] <- 0

crimes_12_to_17$Domestic[which(crimes_12_to_17$Domestic == "True")] <- 1
crimes_12_to_17$Domestic[which(crimes_12_to_17$Domestic == "False")] <- 0
summary(crimes_12_to_17)
##        X                 ID           Case.Number       
##  Min.   :      3   Min.   :   20224   Length:1419591    
##  1st Qu.:2696734   1st Qu.: 8987142   Class :character  
##  Median :3052459   Median : 9576373   Mode  :character  
##  Mean   :3273592   Mean   : 9574958                     
##  3rd Qu.:3409868   3rd Qu.:10191992                     
##  Max.   :6253474   Max.   :10823344                     
##                                                         
##      Date              Block                IUCR       
##  Length:1419591     Length:1419591     0820   :133303  
##  Class :character   Class :character   0486   :128283  
##  Mode  :character   Mode  :character   0460   : 86510  
##                                        0810   : 72957  
##                                        1320   : 71362  
##                                        1310   : 70342  
##                                        (Other):856834  
##           Primary.Type                     Description    
##  THEFT          :322423   SIMPLE                 :147846  
##  BATTERY        :258942   $500 AND UNDER         :133303  
##  CRIMINAL DAMAGE:152813   DOMESTIC BATTERY SIMPLE:128283  
##  NARCOTICS      :131177   TO VEHICLE             : 74572  
##  ASSAULT        : 89508   OVER $500              : 72957  
##  OTHER OFFENSE  : 85362   TO PROPERTY            : 70342  
##  (Other)        :379366   (Other)                :792288  
##                      Location.Description    Arrest         
##  STREET                        :325084    Length:1419591    
##  RESIDENCE                     :223854    Class :character  
##  APARTMENT                     :179444    Mode  :character  
##  SIDEWALK                      :158478                      
##  OTHER                         : 53474                      
##  PARKING LOT/GARAGE(NON.RESID.): 40907                      
##  (Other)                       :438350                      
##    Domestic              Beat         District          Ward      
##  Length:1419591     Min.   : 111   Min.   : 1.00   Min.   : 1.00  
##  Class :character   1st Qu.: 613   1st Qu.: 6.00   1st Qu.:10.00  
##  Mode  :character   Median :1024   Median :10.00   Median :23.00  
##                     Mean   :1150   Mean   :11.26   Mean   :22.86  
##                     3rd Qu.:1711   3rd Qu.:17.00   3rd Qu.:34.00  
##                     Max.   :2535   Max.   :31.00   Max.   :50.00  
##                                                                   
##  Community.Area    FBI.Code          X.Coordinate      Y.Coordinate    
##  Min.   : 0.00   Length:1419591     Min.   :      0   Min.   :      0  
##  1st Qu.:23.00   Class :character   1st Qu.:1152544   1st Qu.:1858762  
##  Median :32.00   Mode  :character   Median :1166022   Median :1891502  
##  Mean   :37.46                      Mean   :1164401   Mean   :1885527  
##  3rd Qu.:57.00                      3rd Qu.:1176363   3rd Qu.:1908713  
##  Max.   :77.00                      Max.   :1205119   Max.   :1951573  
##                                                                        
##       Year       Updated.On           Latitude       Longitude     
##  Min.   :2012   Length:1419591     Min.   :36.62   Min.   :-91.69  
##  1st Qu.:2013   Class :character   1st Qu.:41.77   1st Qu.:-87.72  
##  Median :2014   Mode  :character   Median :41.86   Median :-87.67  
##  Mean   :2014                      Mean   :41.84   Mean   :-87.67  
##  3rd Qu.:2015                      3rd Qu.:41.91   3rd Qu.:-87.63  
##  Max.   :2017                      Max.   :42.02   Max.   :-87.52  
##                                                                    
##    Location        
##  Length:1419591    
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
#install.packages('ggplot2')
library(ggplot2)
primary_type <- ggplot(crimes_12_to_17, aes(Primary.Type))
primary_type + geom_histogram(stat = "count") + coord_flip()
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Top five types of crimes: Fill this in later

ggplot(crimes_12_to_17, aes(Year)) +
  geom_density()

#install.packages('plotrix')
library(plotrix)
arrests <- table(crimes_12_to_17$Arrest)
lbls <- paste(names(arrests), "\n", arrests, sep="")
pie3D(arrests, labels = lbls, 
    main="Arrests results (1 = True, 0 = False) from Crimes commited ")

domestic <- table(crimes_12_to_17$Domestic)
lbls <- paste(names(domestic), "\n", domestic, sep="")
pie(domestic, labels = lbls, 
    main="Domestic results (1 = True, 0 = False) for Crimes commited ")

#levels(crimes_12_to_17$IUCR) #353 Levels
top10_iucr <- tail(names(sort(table(crimes_12_to_17$IUCR))), 10)
iucr_raw <- table(crimes_12_to_17$IUCR)
barplot(iucr_raw[order(iucr_raw, decreasing = TRUE)], xlim = c(0,11))

#levels(crimes_12_to_17$Description) #340 Levels
top10_description <- tail(names(sort(table(crimes_12_to_17$Description))), 10)
head(top10_description)
## [1] "FROM BUILDING"                "AUTOMOBILE"                  
## [3] "FORCIBLE ENTRY"               "POSS: CANNABIS 30GMS OR LESS"
## [5] "TO PROPERTY"                  "OVER $500"
#levels(crimes_12_to_17$Location.Description) #141 Levels
top10_location_description <- tail(names(sort(table(crimes_12_to_17$Location.Description))), 10)
head(top10_location_description)
## [1] "SCHOOL, PUBLIC, BUILDING"       "SMALL RETAIL STORE"            
## [3] "RESIDENTIAL YARD (FRONT/BACK)"  "ALLEY"                         
## [5] "PARKING LOT/GARAGE(NON.RESID.)" "OTHER"
#street, residence, apartment, etc...
#location_description_raw <- table(crimes_12_to_17$Location.Description)
#barplot(location_description_raw[order(location_description_raw, decreasing = TRUE)], xlim = c(0,11))
#scaling the graph is impossible
crimes_12_to_17$Beat <- as.factor(crimes_12_to_17$Beat) #Put this at the beggining of the report
#levels(crimes_12_to_17$Beat) #289 Levels
top10_beat <- tail(names(sort(table(crimes_12_to_17$Beat))), 10)
beat_raw <- table(crimes_12_to_17$Beat)
barplot(beat_raw[order(beat_raw, decreasing = TRUE)], xlim = c(0,11))

crimes_12_to_17$District <- as.factor(crimes_12_to_17$District) #Put this at the beggining of the report
#levels(crimes_12_to_17$District) #23 Levels
top10_district <- tail(names(sort(table(crimes_12_to_17$District))), 10)
district_raw <- table(crimes_12_to_17$District)
barplot(district_raw[order(district_raw, decreasing = TRUE)], xlim = c(0,11))

crimes_12_to_17$Ward <- as.factor(crimes_12_to_17$Ward) #Put this at the beggining of the report
#levels(crimes_12_to_17$Ward) #45 Levels
top10_ward <- tail(names(sort(table(crimes_12_to_17$Ward))), 10)
ward_raw <- table(crimes_12_to_17$Ward)
barplot(ward_raw[order(ward_raw, decreasing = TRUE)], xlim = c(0,11))

crimes_12_to_17$Community.Area <- as.factor(crimes_12_to_17$Community.Area) #Put this at the beggining of the report
#levels(crimes_12_to_17$Community.Area) #67 Levels
top10_community_area <- tail(names(sort(table(crimes_12_to_17$Community.Area))), 10)
community_area_raw <- table(crimes_12_to_17$Community.Area)
barplot(community_area_raw[order(community_area_raw, decreasing = TRUE)], xlim = c(0,11))

crimes_12_to_17$FBI.Code <- as.factor(crimes_12_to_17$FBI.Code) #Put this at the beggining of the report
#levels(crimes_12_to_17$FBI.Code) #19 Levels
top10_fbi_code <- tail(names(sort(table(crimes_12_to_17$FBI.Code))), 10)
fbi_raw <- table(crimes_12_to_17$FBI.Code)
barplot(fbi_raw[order(fbi_raw, decreasing = TRUE)], xlim = c(0,11))

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.