1. Introduction

Minneapolis has been known for its high quality of life across the states with the astonishing city nature of the Mississippi River. However, It is also infamously famous for the death of George Floyd on May, 2020. This analysis is to find out the trend of the police incidents in Minneapolis since 2020 to see if there is any changes in safety before and after the Mr.Floyd’s death.




suppressPackageStartupMessages({
library("tidyverse")
library("leaflet")
library("zoo")
library("ggthemes")
library("knitr")
library("shiny")
})



# Import data sets
incident_2020 <- read_csv("Police_Incidents_2020.csv")
incident_2021 <- read_csv("Police_Incidents_2021.csv")

Inspecting the Datasets

Summary

summary(incident_2020)
## Warning: One or more parsing issues, see `problems()` for details
##        X                Y         publicaddress       caseNumber       
##  Min.   :-93.33   Min.   : 0.00   Length:24136       Length:24136      
##  1st Qu.:-93.29   1st Qu.:44.95   Class :character   Class :character  
##  Median :-93.27   Median :44.96   Mode  :character   Mode  :character  
##  Mean   :-93.23   Mean   :44.95                                        
##  3rd Qu.:-93.25   3rd Qu.:44.99                                        
##  Max.   :  0.00   Max.   :45.05                                        
##                                                                        
##     precinct     reportedDate        reportedTime   beginDate        
##  Min.   :1.000   Length:24136       Min.   :   0   Length:24136      
##  1st Qu.:2.000   Class :character   1st Qu.: 954   Class :character  
##  Median :3.000   Mode  :character   Median :1342   Mode  :character  
##  Mean   :3.147                      Mean   :1330                     
##  3rd Qu.:4.000                      3rd Qu.:1753                     
##  Max.   :5.000                      Max.   :2359                     
##  NA's   :27                                                          
##  reportedDateTime     beginTime        offense          description       
##  Length:24136       Min.   :   0.0   Length:24136       Length:24136      
##  Class :character   1st Qu.: 748.8   Class :character   Class :character  
##  Mode  :character   Median :1440.0   Mode  :character   Mode  :character  
##                     Mean   :1309.9                                        
##                     3rd Qu.:1916.0                                        
##                     Max.   :2359.0                                        
##                                                                           
##    UCRCode          enteredDate         centergbsid      centerLong    
##  Length:24136       Length:24136       Min.   :    0   Min.   :-93.33  
##  Class :character   Class :character   1st Qu.:14675   1st Qu.:-93.29  
##  Mode  :character   Mode  :character   Median :17199   Median :-93.27  
##                                        Mean   :17141   Mean   :-93.23  
##                                        3rd Qu.:19960   3rd Qu.:-93.25  
##                                        Max.   :26053   Max.   :  0.00  
##                                        NA's   :3128                    
##    centerLat        centerX             centerY        neighborhood      
##  Min.   : 0.00   Min.   :-10389348   Min.   :      0   Length:24136      
##  1st Qu.:44.95   1st Qu.:-10384793   1st Qu.:5613257   Class :character  
##  Median :44.96   Median :-10382724   Median :5615830   Mode  :character  
##  Mean   :44.95   Mean   :-10378169   Mean   :5613921                     
##  3rd Qu.:44.99   3rd Qu.:-10380246   3rd Qu.:5619479                     
##  Max.   :45.05   Max.   :        0   Max.   :5629589                     
##                                                                          
##  lastchanged        LastUpdateDateETL     OBJECTID    
##  Length:24136       Length:24136       Min.   :    1  
##  Class :character   Class :character   1st Qu.: 6035  
##  Mode  :character   Mode  :character   Median :12068  
##                                        Mean   :12068  
##                                        3rd Qu.:18102  
##                                        Max.   :24136  
## 

Structure

glimpse(incident_2020)
## Rows: 24,136
## Columns: 23
## $ X                 <dbl> -93.27970, -93.28882, -93.24985, -93.28135, -93.2652~
## $ Y                 <dbl> 44.94383, 44.96495, 44.92242, 44.96773, 44.90978, 44~
## $ publicaddress     <chr> "0032XX BLAISDELL AVE", "0019XX HENNEPIN AVE", "0044~
## $ caseNumber        <chr> "MP202068067", "MP2020701518", "MP2020701520", "MP20~
## $ precinct          <dbl> 5, 5, 3, 1, 3, 5, 2, 3, 5, 1, 2, 4, 2, 2, 1, 2, 5, 4~
## $ reportedDate      <chr> "2020/03/13 00:00:00+00", "2020/03/20 00:00:00+00", ~
## $ reportedTime      <dbl> 2333, 1020, 1146, 1242, 1249, 1335, 1411, 1700, 1729~
## $ beginDate         <chr> "2020/03/13 00:00:00+00", "2020/03/11 00:00:00+00", ~
## $ reportedDateTime  <chr> "2020/03/13 23:33:00+00", "2020/03/20 10:20:00+00", ~
## $ beginTime         <dbl> 2055, 502, 1730, 1, 2000, 1700, 2100, 2230, 1500, 23~
## $ offense           <chr> "THEFT", "TMVP", "TMVP", "TFMV", "TMVP", "TMVP", "TM~
## $ description       <chr> "OTHER THEFT", "THEFT-MOTR VEH PARTS", "THEFT-MOTR V~
## $ UCRCode           <chr> "07", "07", "07", "07", "07", "07", "07", "07", "07"~
## $ enteredDate       <chr> "2020/03/17 00:00:00+00", "2020/03/21 00:00:00+00", ~
## $ centergbsid       <dbl> 14077, 21641, 19468, 15054, 18637, 20532, 11077, NA,~
## $ centerLong        <dbl> -93.27971, -93.28882, -93.24986, -93.28135, -93.2652~
## $ centerLat         <dbl> 44.94383, 44.96496, 44.92242, 44.96773, 44.90978, 44~
## $ centerX           <dbl> -10383849, -10384864, -10380526, -10384032, -1038223~
## $ centerY           <dbl> 5612683, 5616006, 5609316, 5616442, 5607329, 5608157~
## $ neighborhood      <chr> "Lyndale", "Lowry Hill East", "Northrop", "Loring Pa~
## $ lastchanged       <chr> "2020/03/21 00:00:00+00", "2020/03/21 00:00:00+00", ~
## $ LastUpdateDateETL <chr> "2020/07/15 07:00:22+00", "2020/07/15 07:00:22+00", ~
## $ OBJECTID          <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1~




Check the Missing Data

  • Drop 11 rows that does not contain geo location
  • Will leave those rows containing missing public address / neighborhood
# Adjust data type and concatenate 
incident_2021$precinct <- as.character(incident_2021$precinct)
incident_2020$precinct <- as.character(incident_2020$precinct)
incident_raw <- bind_rows(incident_2020, incident_2021)


# Select relevant columns only
incident_selected <- incident_raw %>% select(publicaddress, reportedDateTime,
                                             description, centerLong,
                                             centerLat, neighborhood,offense)

# Parse datetime on reporteDateTime column

incident_selected$reportedDateTime <- substr(incident_selected$reportedDateTime, 1,16)
incident_selected$reportedDateTime <- as.character(incident_selected$reportedDateTime)
incident_selected$reportedDateTime <- parse_datetime(incident_selected$reportedDateTime, "%Y/%m/%d %H:%M")
incident_selected <- incident_selected %>% mutate(reportedDate = format(reportedDateTime, "%Y/%m")) 

# Clean NA and zero value row 
incident_selected <- incident_selected[incident_selected$centerLong != 0,]
colSums(incident_selected ==0)
##    publicaddress reportedDateTime      description       centerLong 
##               NA                0                0                0 
##        centerLat     neighborhood          offense     reportedDate 
##                0               NA                0                0




2. Analysis

Crime Mapping with Leaflet

  • Plotting 60k+ police incidents in the OpenstreetMap of Minneapolis
  • This map is interactive map which you can zoom in/out
  • Also, details of the each incident are available once zoom until the end
# Plot on map by leaflet
incident_selected %>% leaflet() %>% 
  addProviderTiles(providers$Stamen.TonerLite) %>% 
  addMarkers(label = incident_selected$description,
             clusterOptions = markerClusterOptions(),
             lng = incident_selected$centerLong, lat = incident_selected$centerLat,
             popup = paste("<b>","Neighborhood:","</b>",
                           incident_selected$neighborhood,"<br>",
                           "<b>","Address:","</b>",
                           incident_selected$publicaddress,"<br>",
                           "<b>","Time:","</b>",
                           incident_selected$reportedDateTime)) %>%
  addMiniMap(toggleDisplay = TRUE,tiles = providers$Stamen.TonerLite)




Trend of Police Incidents Since 2020

  • Annotation in the graph shows the death of George Floyd on May 2020
  • The total police incidents seems to be increasing since May 2020
  • From 2021, the number of incidents became similar to the previous year on yoy basis
# Summarise by date and plot 
summarised_incident_selected <- incident_selected %>% 
  filter(reportedDate != "2021/05") %>%
  group_by(reportedDate) %>%
  summarise(num_case = n()) 

ggplot(summarised_incident_selected, aes(reportedDate, num_case, group=1))+
  geom_col(fill='red')+expand_limits(y = 0)+  theme_tufte()+
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 0.5))+
  xlab("") + ylab("")+
  annotate("point", x = '2020/05', y = 2000, colour = "blue", size=3,shape=8) +
  annotate("text", x = '2020/05', y = 2000, label = "Death of G.F", colour = "blue", vjust = -1)




Top 10 Neighborhood with the Most Police Incidents

  • In addition to the previous plot, below graph shows the list of top 10 neighborhood where the most police incidents occurred
  • Downtown West, Whittier, Marcy Holmes were top 3 neighborhood
top_10_neighbor <- incident_selected %>% 
  group_by(neighborhood) %>% 
  count(sort=TRUE) %>% ungroup() %>% 
  top_n(10) %>% mutate(neighborhood = fct_reorder(neighborhood,n))


ggplot(top_10_neighbor, aes(n,neighborhood)) +
  geom_point(size = 12) +
  geom_segment(aes(xend = 30, yend = neighborhood), size = 2) +
  geom_text(aes(label = round(n,1)), color = "white", size = 4) +
  #scale_x_continuous("", expand = c(0,0), limits = c(30,90), position = "top") +
  scale_color_gradientn(colors = palette) +
  theme(axis.line.y = element_blank(),
      axis.ticks.y = element_blank(),
      axis.text = element_text(color="black"),
      axis.title = element_blank(),
      legend.position = "none")




Police Incidents Near Univ. Minnesota

univ_neighbor = c("Cedar Riverside",
                  "Como",
                  "University of Minnesota",  
                  "Prospect Park - East River Road",
                  "Marcy Holmes")

near_univ_neighbor <- incident_selected %>% 
  filter(neighborhood==univ_neighbor) %>%
  filter(reportedDate != "2021/05") %>%
  group_by(reportedDate,neighborhood) %>% 
  count() 

ggplot(near_univ_neighbor, aes(reportedDate, n,fill=neighborhood)) + 
  geom_col() + 
  xlab("") + ylab("")+
  theme_classic() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

  • Univ. Minnesota is located at the heart of Minneapolis, surrounded by 5 neighborhood
  • Most univ. students find it hard to seek for a safe place to live
  • Marcy Holmes, where Dinky Town is located at, might not be a good place in terms of police incidents




3. Conclusion