library(RCurl)
library(XML)
library(jsonlite)
library(knitr)
library(kableExtra)
library(tidyverse)
library(tidyr)
library(geosphere)
library(leaflet)
library(plyr)
library(tools)
library(prettydoc)
library(ggrepel)

Objective

For this project, I will investigate crime, and crime rates in NYC. The main questions I want to answer are:

  1. Which NYC borough has the highest crime rate?

  2. Which category of crimes are most common in New York City as a whole?

Data Sources

The 2 main data sources I will use for this project can be found on NYCOpenData (https://opendata.cityofnewyork.us/data/).

I will explore the “NYPD Arrests Data (Historic)” dataset, and will pull crime statistics from this set in order to explore crime rates in NYC. I will also use the “New York City Population by Borough, 1950 - 2040” dataset in order to calculate crime rates per New York City borough.

The datasets can be found at the following locations:

NYPD Arrests Data (Historic) - https://data.cityofnewyork.us/Public-Safety/NYPD-Arrests-Data-Historic-/8h9b-rp9u

New York City Population by Borough, 1950 - 2040 - https://data.cityofnewyork.us/City-Government/New-York-City-Population-by-Borough-1950-2040/xywu-7bv9

Import the Datasets.

The NYPD Arrests Data (Historic) csv file is 2.05 GB, so I decided to obtain the data via NYCOpenData’s JSON endpoint to save on memory. The New York City Population by Borough, 1950 - 2040 csv file is only 4 KB, but for consistency, I also imported this dataset via the JSON endpoint.

crime_data <- fromJSON("https://data.cityofnewyork.us/resource/qgea-i56i.json")
nyc_population <- fromJSON("https://data.cityofnewyork.us/resource/xywu-7bv9.json")

Data Cleansing

NYPD Arrests Dataset

View the number of columns and rows in the crime_data dataset.

dim(crime_data)
## [1] 1000   40

Now look at the data structure, and column names.

str(crime_data)
## 'data.frame':    1000 obs. of  40 variables:
##  $ cmplnt_num                 : chr  "522575447" "403507361" "631420068" "995609899" ...
##  $ cmplnt_fr_dt               : chr  "2006-08-29T00:00:00.000" "2006-11-05T00:00:00.000" "2006-09-08T00:00:00.000" "2011-12-13T00:00:00.000" ...
##  $ cmplnt_fr_tm               : chr  "13:00:00" "11:00:00" "23:30:00" "18:40:00" ...
##  $ addr_pct_cd                : chr  "43" "66" "106" "79" ...
##  $ rpt_dt                     : chr  "2006-08-30T00:00:00.000" "2006-11-05T00:00:00.000" "2006-09-09T00:00:00.000" "2011-12-13T00:00:00.000" ...
##  $ ky_cd                      : chr  "578" "107" "347" "341" ...
##  $ ofns_desc                  : chr  "HARRASSMENT 2" "BURGLARY" "INTOXICATED & IMPAIRED DRIVING" "PETIT LARCENY" ...
##  $ pd_cd                      : chr  "638" "221" "905" "333" ...
##  $ pd_desc                    : chr  "HARASSMENT,SUBD 3,4,5" "BURGLARY,RESIDENCE,DAY" "INTOXICATED DRIVING,ALCOHOL" "LARCENY,PETIT FROM STORE-SHOPL" ...
##  $ crm_atpt_cptd_cd           : chr  "COMPLETED" "COMPLETED" "COMPLETED" "COMPLETED" ...
##  $ law_cat_cd                 : chr  "VIOLATION" "FELONY" "MISDEMEANOR" "MISDEMEANOR" ...
##  $ boro_nm                    : chr  "BRONX" "BROOKLYN" "QUEENS" "BROOKLYN" ...
##  $ loc_of_occur_desc          : chr  "INSIDE" "INSIDE" "FRONT OF" "INSIDE" ...
##  $ prem_typ_desc              : chr  "RESIDENCE - APT. HOUSE" "RESIDENCE - APT. HOUSE" "STREET" "CHAIN STORE" ...
##  $ juris_desc                 : chr  "N.Y. POLICE DEPT" "N.Y. POLICE DEPT" "N.Y. POLICE DEPT" "N.Y. POLICE DEPT" ...
##  $ jurisdiction_code          : chr  "0" "0" "0" "0" ...
##  $ parks_nm                   : logi  NA NA NA NA NA NA ...
##  $ housing_psa                : chr  "NA" "NA" "NA" "NA" ...
##  $ x_coord_cd                 : chr  "1018029" "982556" "1028213" "1000788" ...
##  $ y_coord_cd                 : chr  "240747" "171385" "186786" "189718" ...
##  $ susp_race                  : chr  "UNKNOWN" NA NA NA ...
##  $ susp_sex                   : chr  "M" NA NA NA ...
##  $ latitude                   : chr  "40.827414051" "40.637096864" "40.679260229" "40.687401619" ...
##  $ longitude                  : chr  "-73.877945775" "-74.006105014" "-73.841500185" "-73.940369194" ...
##  $ lat_lon                    :'data.frame': 1000 obs. of  2 variables:
##   ..$ latitude : chr  "40.827414051" "40.637096864" "40.679260229" "40.687401619" ...
##   ..$ longitude: chr  "-73.877945775" "-74.006105014" "-73.841500185" "-73.940369194" ...
##  $ patrol_boro                : chr  "PATROL BORO BRONX" "PATROL BORO BKLYN SOUTH" "PATROL BORO QUEENS SOUTH" "PATROL BORO BKLYN NORTH" ...
##  $ vic_age_group              : chr  "25-44" "45-64" NA NA ...
##  $ vic_race                   : chr  "BLACK HISPANIC" "ASIAN / PACIFIC ISLANDER" "UNKNOWN" "UNKNOWN" ...
##  $ vic_sex                    : chr  "F" "F" "E" "D" ...
##  $ :@computed_region_efsh_h5xi: chr  "11610" "18180" "24013" "18181" ...
##  $ :@computed_region_f5dn_yrer: chr  "58" "2" "62" "69" ...
##  $ :@computed_region_yeji_bk3q: chr  "5" "2" "3" "2" ...
##  $ :@computed_region_92fq_4b7q: chr  "31" "7" "41" "49" ...
##  $ :@computed_region_sbqj_enih: chr  "26" "39" "64" "51" ...
##  $ cmplnt_to_dt               : chr  NA "2006-11-05T00:00:00.000" "2006-09-09T00:00:00.000" "2011-12-13T00:00:00.000" ...
##  $ cmplnt_to_tm               : chr  NA "17:40:00" "00:01:00" "18:49:00" ...
##  $ susp_age_group             : chr  NA NA NA NA ...
##  $ hadevelopt                 : chr  NA NA NA NA ...
##  $ transit_district           : chr  NA NA NA NA ...
##  $ station_name               : chr  NA NA NA NA ...

List the dataset column names so we can see which columns are useful for analysis purposes.

colnames(crime_data)
##  [1] "cmplnt_num"                  "cmplnt_fr_dt"               
##  [3] "cmplnt_fr_tm"                "addr_pct_cd"                
##  [5] "rpt_dt"                      "ky_cd"                      
##  [7] "ofns_desc"                   "pd_cd"                      
##  [9] "pd_desc"                     "crm_atpt_cptd_cd"           
## [11] "law_cat_cd"                  "boro_nm"                    
## [13] "loc_of_occur_desc"           "prem_typ_desc"              
## [15] "juris_desc"                  "jurisdiction_code"          
## [17] "parks_nm"                    "housing_psa"                
## [19] "x_coord_cd"                  "y_coord_cd"                 
## [21] "susp_race"                   "susp_sex"                   
## [23] "latitude"                    "longitude"                  
## [25] "lat_lon"                     "patrol_boro"                
## [27] "vic_age_group"               "vic_race"                   
## [29] "vic_sex"                     ":@computed_region_efsh_h5xi"
## [31] ":@computed_region_f5dn_yrer" ":@computed_region_yeji_bk3q"
## [33] ":@computed_region_92fq_4b7q" ":@computed_region_sbqj_enih"
## [35] "cmplnt_to_dt"                "cmplnt_to_tm"               
## [37] "susp_age_group"              "hadevelopt"                 
## [39] "transit_district"            "station_name"

We don’t need all of the information in the crime_data dataset for our analysis, so we take what we need, and leave what we don’t.

Clean the crime_data dataset so that we can use it for our analysis.

# Extract the columns containing the data that we need.
crime_data_refined <- select(crime_data, rpt_dt, ofns_desc, pd_desc, ofns_desc, law_cat_cd, boro_nm, addr_pct_cd, latitude, longitude, lat_lon)

# Remove NAs from the data.
sapply(crime_data_refined, function(x) sum(is.na(x)))
##      rpt_dt   ofns_desc     pd_desc  law_cat_cd     boro_nm addr_pct_cd 
##           0           9           0           0           2           0 
##    latitude   longitude     lat_lon 
##           9           9          18
clean_crime_data <- na.omit(crime_data_refined)
sapply(clean_crime_data, function(x) sum(is.na(x)))
##      rpt_dt   ofns_desc     pd_desc  law_cat_cd     boro_nm addr_pct_cd 
##           0           0           0           0           0           0 
##    latitude   longitude     lat_lon 
##           0           0           0
summary(clean_crime_data)
##     rpt_dt           ofns_desc           pd_desc         
##  Length:980         Length:980         Length:980        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##   law_cat_cd          boro_nm          addr_pct_cd       
##  Length:980         Length:980         Length:980        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##    latitude          longitude        
##  Length:980         Length:980        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##   lat_lon.latitude   lat_lon.longitude 
##  Length:980          Length:980        
##  Class :character    Class :character  
##  Mode  :character    Mode  :character

Look at a sample of the clean data.

sample_n(clean_crime_data, 5)
##                    rpt_dt                      ofns_desc
## 1 2010-06-01T00:00:00.000                  GRAND LARCENY
## 2 2009-07-27T00:00:00.000  OFF. AGNST PUB ORD SENSBLTY &
## 3 2006-08-18T00:00:00.000        MISCELLANEOUS PENAL LAW
## 4 2007-09-12T00:00:00.000                  HARRASSMENT 2
## 5 2006-11-25T00:00:00.000 CRIMINAL MISCHIEF & RELATED OF
##                                                  pd_desc  law_cat_cd
## 1 LARCENY,GRAND FROM BUILDING (NON-RESIDENCE) UNATTENDED      FELONY
## 2                                AGGRAVATED HARASSMENT 2 MISDEMEANOR
## 3                                    CRIMINAL CONTEMPT 1      FELONY
## 4                                  HARASSMENT,SUBD 3,4,5   VIOLATION
## 5                         MISCHIEF, CRIMINAL 4, OF MOTOR MISDEMEANOR
##     boro_nm addr_pct_cd     latitude     longitude lat_lon.latitude
## 1 MANHATTAN           1 40.703684905 -74.009667273     40.703684905
## 2 MANHATTAN          17 40.755516375 -73.970988622     40.755516375
## 3  BROOKLYN          84 40.691094994 -73.996074542     40.691094994
## 4     BRONX          42 40.832036889 -73.886736442     40.832036889
## 5  BROOKLYN          83  40.70685174 -73.919028249      40.70685174
##   lat_lon.longitude
## 1     -74.009667273
## 2     -73.970988622
## 3     -73.996074542
## 4     -73.886736442
## 5     -73.919028249

NYC Population Dataset

View the number of columns and rows in the nyc_population dataset.

dim(nyc_population)
## [1]  6 22

Look at the data structure, and column names.

str(nyc_population)
## 'data.frame':    6 obs. of  22 variables:
##  $ age_group                    : chr  "Total Population" "Total Population" "Total Population" "Total Population" ...
##  $ borough                      : chr  "NYC Total" "   Bronx" "   Brooklyn" "   Manhattan" ...
##  $ _1950                        : chr  "7891957" "1451277" "2738175" "1960101" ...
##  $ _1950_boro_share_of_nyc_total: chr  "100.00" "18.39" "34.70" "24.84" ...
##  $ _1960                        : chr  "7781984" "1424815" "2627319" "1698281" ...
##  $ _1960_boro_share_of_nyc_total: chr  "100.00" "18.31" "33.76" "21.82" ...
##  $ _1970                        : chr  "7894862" "1471701" "2602012" "1539233" ...
##  $ _1970_boro_share_of_nyc_total: chr  "100.00" "18.64" "32.96" "19.50" ...
##  $ _1980                        : chr  "7071639" "1168972" "2230936" "1428285" ...
##  $ _1980_boro_share_of_nyc_total: chr  "100.00" "16.53" "31.55" "20.20" ...
##  $ _1990                        : chr  "7322564" "1203789" "2300664" "1487536" ...
##  $ _1990_boro_share_of_nyc_total: chr  "100.00" "16.44" "31.42" "20.31" ...
##  $ _2000                        : chr  "8008278" "1332650" "2465326" "1537195" ...
##  $ _2000_boro_share_of_nyc_total: chr  "100.00" "16.64" "30.78" "19.20" ...
##  $ _2010                        : chr  "8242624" "1385108" "2552911" "1585873" ...
##  $ _2010_boro_share_of_nyc_total: chr  "100.00" "16.80" "30.97" "19.24" ...
##  $ _2020                        : chr  "8550971" "1446788" "2648452" "1638281" ...
##  $ _2020_boro_share_of_nyc_total: chr  "100.00" "16.92" "30.97" "19.16" ...
##  $ _2030                        : chr  "8821027" "1518998" "2754009" "1676720" ...
##  $ _2030_boro_share_of_nyc_total: chr  "100.00" "17.22" "31.22" "19.01" ...
##  $ _2040                        : chr  "9025145" "1579245" "2840525" "1691617" ...
##  $ _2040_boro_share_of_nyc_total: chr  "100.00" "17.50" "31.47" "18.74" ...

List the dataset column names.

colnames(nyc_population)
##  [1] "age_group"                     "borough"                      
##  [3] "_1950"                         "_1950_boro_share_of_nyc_total"
##  [5] "_1960"                         "_1960_boro_share_of_nyc_total"
##  [7] "_1970"                         "_1970_boro_share_of_nyc_total"
##  [9] "_1980"                         "_1980_boro_share_of_nyc_total"
## [11] "_1990"                         "_1990_boro_share_of_nyc_total"
## [13] "_2000"                         "_2000_boro_share_of_nyc_total"
## [15] "_2010"                         "_2010_boro_share_of_nyc_total"
## [17] "_2020"                         "_2020_boro_share_of_nyc_total"
## [19] "_2030"                         "_2030_boro_share_of_nyc_total"
## [21] "_2040"                         "_2040_boro_share_of_nyc_total"

Clean the nyc_population dataset.

# We only need 2 columns from the table (Borough and Population).
nyc_population_refined <- select(nyc_population, borough, '_2020')

# Remove NAs from the data.
sapply(nyc_population_refined, function(x) sum(is.na(x)))
## borough   _2020 
##       0       0
clean_pop_data <- na.omit(nyc_population_refined)
sapply(clean_pop_data, function(x) sum(is.na(x)))
## borough   _2020 
##       0       0
# Get rid of the NYC Total population row as we don't need it.
clean_pop_data = clean_pop_data[-1,]

# Rename the columns.
colnames(clean_pop_data) <- c('Borough', 'Population')

sample_n(clean_pop_data, 5)
##            Borough Population
## 1           Queens    2330295
## 2         Brooklyn    2648452
## 3    Staten Island     487155
## 4            Bronx    1446788
## 5        Manhattan    1638281

Data Analysis

Crime Frequency by Borough

# Count the occurences of a borough name to give us an idea of the crime rate in the borough.
crimes_per_borough <- table(clean_crime_data$boro_nm)
head(crimes_per_borough)
## 
##         BRONX      BROOKLYN     MANHATTAN        QUEENS STATEN ISLAND 
##           211           308           233           176            52

Create a new dataframe that contains a population column for each borough.

crimes_dataframe <- as.data.frame(crimes_per_borough, stringsAsFactors=FALSE)
population_dataframe <- as.data.frame(clean_pop_data, stringsAsFactors=FALSE)

# Add column names to the crimes_dataframe.
colnames(crimes_dataframe) <- c('Borough', 'Crimes')

# Convert borough names to title case form so that we can join on 'Borough'
# with the population_dataframe.
crimes_dataframe$Borough <- str_to_title(crimes_dataframe$Borough)

# Join the 2 tables together.
crimes_population <- join(crimes_dataframe, population_dataframe, by = 'Borough', type = 'right')
crimes_population$Crimes <- crimes_dataframe$Crimes

kable(crimes_population, "html", escape = F) %>%
  kable_styling("striped", full_width = T) %>%
  column_spec(1, bold = T)
Borough Crimes Population
Bronx 211 1446788
Brooklyn 308 2648452
Manhattan 233 1638281
Queens 176 2330295
Staten Island 52 487155

Calculate the crime rate for each Borough, and add a CrimeRate column to the crimes_population dataframe.

crime_rate <- as.numeric(crimes_population$Crimes) / as.numeric(crimes_population$Population) * 100000
crimes_population$CrimeRate <- round(as.numeric(format(crime_rate, scientific = FALSE)), 2)
kable(crimes_population, "html", escape = F) %>%
  kable_styling("striped", full_width = T) %>%
  column_spec(1, bold = T)
Borough Crimes Population CrimeRate
Bronx 211 1446788 14.58
Brooklyn 308 2648452 11.63
Manhattan 233 1638281 14.22
Queens 176 2330295 7.55
Staten Island 52 487155 10.67

Plot a bar graph displaying the crime rates for each borough.

crime_by_borough_plot <-ggplot(crimes_population, aes(x = Borough, y = CrimeRate, fill = Borough)) +
  geom_bar(stat = 'identity') +
      labs(title = 'Crime Rates by Borough',
         x = 'Borough',
         y = 'Crime Rate')


crime_by_borough_plot

New York City crime by category

cat_nyc <- select(clean_crime_data, law_cat_cd)

crime_category_table <- table(cat_nyc$law_cat_cd)
crime_category_table <- sort(crime_category_table, decreasing = TRUE)

crime_by_category <- as.data.frame(crime_category_table, stringsAsFactors = FALSE)
colnames(crime_by_category) <- c("Category", "Frequency")
crime_by_category$Percentage <- crime_by_category$Frequency / sum(crime_by_category$Frequency) * 100
crime_by_category
##      Category Frequency Percentage
## 1 MISDEMEANOR       572   58.36735
## 2      FELONY       291   29.69388
## 3   VIOLATION       117   11.93878

Bar graph displaying arrest frequency by crime category in NYC.

crime_by_category_plot <-ggplot(crime_by_category, aes(x = Category, y = Frequency, fill = Category)) + 
  geom_bar(stat="identity") + 
        labs(title = 'Crime by Category',
         x = 'Category',
         y = 'Frequency')
  
crime_by_category_plot

Pie chart displaying arrest frequency by crime category in NYC.

crime_category_pie_chart <-ggplot(crime_by_category, aes(x = '', y = Frequency, fill = Category)) + 
  geom_bar(stat ='identity') +
  coord_polar(theta = 'y') +
  scale_x_discrete('') +
  labs(title = 'Crime by Category',
    x = 'Category',
    y = 'Frequency')

crime_category_pie_chart

Over half of all arrests in NYC are for misdemeanor offences (crimes such as petty theft, disorderly conduct, public intoxication, assault, etc.). Just under half as many felony arrests were made (crimes such as murder, burglary, arson, etc.), and the remainder of arrests were for violations (disorderly conduct, loitering, etc.). The low level of violation arrests is not surprising as these are low level crimes which rarely result in arrest.

These statistics make sense as felonies are less likely crimes, misdemeanors are less serious crimes, but often result in arrest, and violations rarely lead to arrest.

Crimes Leading to The Least Arrests in NYC

least_crimes <- sort(table(clean_crime_data$pd_desc), decreasing = TRUE)
least_crimes <- data.frame(least_crimes[least_crimes < 10])
colnames(least_crimes) <- c('Crime', 'Frequency')
least_crimes$Percentage <- least_crimes$Frequency / sum(least_crimes$Frequency) * 100

kable(least_crimes, "html", escape = F) %>%
  kable_styling("striped", full_width = T) %>%
  column_spec(1, bold = T)
Crime Frequency Percentage
FORGERY,ETC.-MISD. 9 4.109589
FRAUD,UNCLASSIFIED-FELONY 8 3.652968
LARCENY,GRAND BY THEFT OF CREDIT CARD 8 3.652968
LEAVING SCENE-ACCIDENT-PERSONA 8 3.652968
MISCHIEF, CRIMINAL 3 & 2, OF M 8 3.652968
FORGERY,ETC.,UNCLASSIFIED-FELO 7 3.196347
FRAUD,UNCLASSIFIED-MISDEMEANOR 6 2.739726
VIOLATION OF ORDER OF PROTECTI 6 2.739726
BURGLARY,COMMERCIAL,NIGHT 5 2.283105
CONTROLLED SUBSTANCE,POSSESS. 5 2.283105
LARCENY,GRAND BY BANK ACCT COMPROMISE-REPRODUCED CHECK 5 2.283105
MISCHIEF,CRIMINAL, UNCL 2ND 5 2.283105
RESISTING ARREST 5 2.283105
BRIBERY,PUBLIC ADMINISTRATION 4 1.826484
BURGLARY,RESIDENCE,NIGHT 4 1.826484
LARCENY,PETIT OF VEHICLE ACCES 4 1.826484
MARIJUANA, POSSESSION 4 1.826484
ROBBERY,PUBLIC PLACE INSIDE 4 1.826484
SEXUAL ABUSE 3,2 4 1.826484
THEFT OF SERVICES, UNCLASSIFIE 4 1.826484
AGGRAVATED HARASSMENT 1 3 1.369863
ARSON, MOTOR VEHICLE 1 2 3 & 4 3 1.369863
BURGLARY,RESIDENCE,UNKNOWN TIM 3 1.369863
CHILD, ENDANGERING WELFARE 3 1.369863
LARCENY,GRAND BY ACQUIRING LOS 3 1.369863
LARCENY,GRAND OF MOTORCYCLE 3 1.369863
MARIJUANA, SALE 4 & 5 3 1.369863
RAPE 1 3 1.369863
STOLEN PROPERTY 3,POSSESSION 3 1.369863
TRAFFIC,UNCLASSIFIED MISDEMEAN 3 1.369863
WEAPONS POSSESSION 3 3 1.369863
ADM.CODE,UNCLASSIFIED MISDEMEA 2 0.913242
ARSON 2,3,4 2 0.913242
ASSAULT POLICE/PEACE OFFICER 2 0.913242
CONTEMPT,CRIMINAL 2 0.913242
FORGERY,M.V. REGISTRATION 2 0.913242
LARCENY,GRAND PERSON,NECK CHAI 2 0.913242
LARCENY,PETIT BY ACQUIRING LOS 2 0.913242
LARCENY,PETIT BY DISHONEST EMP 2 0.913242
LARCENY,PETIT OF BICYCLE 2 0.913242
NY STATE LAWS,UNCLASSIFIED FEL 2 0.913242
PUBLIC ADMINISTATION,UNCLASS M 2 0.913242
PUBLIC ADMINISTRATION,UNCLASSI 2 0.913242
RECKLESS ENDANGERMENT 1 2 0.913242
ROBBERY,COMMERCIAL UNCLASSIFIED 2 0.913242
ROBBERY,DWELLING 2 0.913242
STRANGULATION 1ST 2 0.913242
UNAUTHORIZED USE VEHICLE 2 2 0.913242
WEAPONS POSSESSION 1 & 2 2 0.913242
AGGRAVATED SEXUAL ASBUSE 1 0.456621
BAIL JUMPING 1 & 2 1 0.456621
BURGLARY,COMMERCIAL,DAY 1 0.456621
BURGLARY,COMMERCIAL,UNKNOWN TI 1 0.456621
CONTROLLED SUBSTANCE, INTENT T 1 0.456621
CONTROLLED SUBSTANCE,INTENT TO 1 0.456621
CONTROLLED SUBSTANCE,SALE 3 1 0.456621
FACILITATION 4, CRIMINAL 1 0.456621
FORGERY,DRIVERS LICENSE 1 0.456621
IMPRISONMENT 2,UNLAWFUL 1 0.456621
LARCENY, GRAND OF AUTO - ATTEM 1 0.456621
LARCENY,GRAND BY FALSE PROMISE 1 0.456621
LARCENY,GRAND FROM PERSON,PURS 1 0.456621
LARCENY,GRAND FROM STORE-SHOPL 1 0.456621
LARCENY,GRAND OF TRUCK 1 0.456621
LARCENY,GRAND OF VEHICULAR/MOTORCYCLE ACCESSORIES 1 0.456621
LARCENY,PETIT BY CHECK USE 1 0.456621
LARCENY,PETIT BY CREDIT CARD U 1 0.456621
LARCENY,PETIT BY FALSE PROMISE 1 0.456621
LARCENY,PETIT FROM TRUCK 1 0.456621
MARIJUANA, POSSESSION 1, 2 & 3 1 0.456621
MENACING 1ST DEGREE (VICT PEAC 1 0.456621
MISCHIEF, CRIMINAL 3&2, BY FIR 1 0.456621
MISCHIEF, CRIMINAL 4, BY FIRE 1 0.456621
OBSTR BREATH/CIRCUL 1 0.456621
PETIT LARCENY-CHECK FROM MAILB 1 0.456621
RAPE 2 1 0.456621
RECKLESS DRIVING 1 0.456621
RECKLESS ENDANGERMENT 2 1 0.456621
ROBBERY,NECKCHAIN/JEWELRY 1 0.456621
ROBBERY,ON BUS/ OR BUS DRIVER 1 0.456621
ROBBERY,POCKETBOOK/CARRIED BAG 1 0.456621
ROBBERY,RESIDENTIAL COMMON AREA 1 0.456621
SODOMY 1 1 0.456621
STOLEN PROPERTY 2,1,POSSESSION 1 0.456621
TRESPASS 1,CRIMINAL 1 0.456621
UNAUTHORIZED USE VEHICLE 3 1 0.456621

Finally, I wanted to look at the crimes that lead to the least amount of arrests in NYC. It is interesting that these are all low level crimes. This would suggest that they are crimes that people rarely get arrested for, or that they are so low level that people rarely report these crimes.

Conclusion

The number of arrests for low level crimes are consistently low throughout this report. This would suggest that such crimes often go unreported, or they are so low level that they do not warrent arrest. The fact that misdemeanors lead to the most arrests is of no surprise. Such crimes are low level, but they lead to arrest. The fact that they are low level probably means people are more likely to commit these crimes with little effort to hide their actions.

When it comes to safe boroughs in New York, it appears that Queens is the safest borough, followed by Staten Island. The most dangerous borough is the Bronx, followed by Manhattan.

I would not have expected Manhattan to have a high crime rate, so given more time, this would have been an interesting point to explore further. Either there are interesting reason for this lurking below the surface, or the data is somehow skewed.