The current project is aimed to explore the crime rate in the current year. The dataset used in this project is found in this link which is provided by New York Police Department
NYPD Complaint Data Current (Year To Date) link: https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243
Data Last Updated: October 24, 2018 Data Provided by: Police Department (NYPD)
This is a breakdown of every criminal complaint report filed in NYC by the NYPD for the current calendar year. This data is manually extracted every quarter and reviewed by the Office of Management Analysis and Planning. Each record represents a criminal complaint in NYC and includes information abot the type of crime, the location and time of enforcement. In addition, information related to victim and suspect demographics is also included.
if (!require('ggplot2')) install.packages('ggplot2')
if (!require('dplyr')) install.packages('dplyr')
if (!require('leaflet')) install.packages('leaflet')
if (!require('scales')) install.packages('scales')
if (!require('readr')) install.packages('readr')
if (!require('ggmap')) install.packages('ggmap')
if(!require('ggrepel')) install.packages("ggrepel")
library(lubridate)
library(stringr)
Load the data using readr and read_csv().
# Import data
path <- "C:\\Users\\patel\\Desktop\\SPS\\SPS_DATA_607\\final_project\\NYPD_Complaint_Data_Current__Year_To_Date_.csv"
df <- read_csv(path)
df_sub <- df[1:100,] # display the first 100 rows
df_sub$CMPLNT_FR_TM <- as.character(df_sub$CMPLNT_FR_TM)
df_sub
sprintf("Number of Rows in Dataframe: %s", format(nrow(df),big.mark = ","))
## [1] "Number of Rows in Dataframe: 228,905"
The All-Caps text is difficult to read. Let’s force the text in the appropriate columns into proper case.
proper_case <- function(x) {
return (gsub("\\b([A-Z])([A-Z]+)", "\\U\\1\\L\\2" , x, perl=TRUE))
}
library(dplyr)
df <- df %>% mutate(BORO_NM = proper_case(BORO_NM),
JURIS_DESC = proper_case(JURIS_DESC),
LAW_CAT_CD = proper_case(LAW_CAT_CD),
LOC_OF_OCCUR_DESC = proper_case(LOC_OF_OCCUR_DESC),
OFNS_DESC = proper_case(OFNS_DESC),
PARKS_NM = proper_case(PARKS_NM),
PATROL_BORO = proper_case(PATROL_BORO),
PD_DESC = proper_case(PD_DESC),
PREM_TYP_DESC = proper_case(PREM_TYP_DESC),
CMPLNT_FR_TM = as.character(CMPLNT_FR_TM))
df_sub <- df[1:100,] # display the first 100 rows
df_sub
# add column Day of week.
data_dayOfWeek<-df
data_dayOfWeek$CMPLNT_FR_DT <- as.Date(data_dayOfWeek$CMPLNT_FR_DT,format = "%m/%d/%Y")
data_dayOfWeek$day_of_week<- wday(data_dayOfWeek$CMPLNT_FR_DT, label=TRUE)
Display crime incident locations on the map using leaflet. Click icons on the map to show incident details.
data <- df[1:30000,] # display the first 10,000 rows
data$popup <- paste("<b>Incident #: </b>", data$CMPLNT_NUM, "<br>", "<b>Category: </b>", data$LAW_CAT_CD,
"<br>", "<b>Offence Description: </b>", data$OFNS_DESC,
"<br>", "<b>Day of week: </b>", data$DayOfWeek,
"<br>", "<b>Date: </b>", data$CMPLNT_FR_DT,
"<br>", "<b>Time: </b>", data$CMPLNT_FR_TM,
"<br>", "<b>PD Case: </b>", data$PD_CD,
"<br>", "<b>PD Description: </b>", data$PD_DESC,
"<br>", "<b>Longitude: </b>", data$Longitude,
"<br>", "<b>Latitude: </b>", data$Latitude)
## Warning: Unknown or uninitialised column: 'DayOfWeek'.
leaflet(data, width = "100%") %>% addTiles() %>%
addTiles(group = "OSM (default)") %>%
#addProviderTiles(provider = "Esri.WorldStreetMap",group = "World StreetMap") %>%
#addProviderTiles(provider = "Esri.WorldImagery",group = "World Imagery") %>%
# addProviderTiles(provider = "NASAGIBS.ViirsEarthAtNight2012",group = "Nighttime Imagery") %>%
addMarkers(lng = ~Longitude, lat = ~Latitude, popup = data$popup, clusterOptions = markerClusterOptions()) %>%
addLayersControl(
baseGroups = c("OSM (default)","World StreetMap", "World Imagery"),
options = layersControlOptions(collapsed = FALSE)
)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with
## either missing or invalid lat/lon values and will be ignored