Loading packages and libraries:

options(repos = c(CRAN = "https://cloud.r-project.org"))

install.packages(c("dplyr", "tiydverse", "lubridate", "ggplot2", "leaflet", "readr", "sf", "htmlwidgets"))
## Installing packages into 'C:/Users/zahid/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## Warning: package 'tiydverse' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\zahid\AppData\Local\R\win-library\4.4\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\zahid\AppData\Local\R\win-library\4.4\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
## package 'lubridate' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'lubridate'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\zahid\AppData\Local\R\win-library\4.4\00LOCK\lubridate\libs\x64\lubridate.dll
## to
## C:\Users\zahid\AppData\Local\R\win-library\4.4\lubridate\libs\x64\lubridate.dll:
## Permission denied
## Warning: restored 'lubridate'
## package 'ggplot2' successfully unpacked and MD5 sums checked
## package 'leaflet' successfully unpacked and MD5 sums checked
## package 'readr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\zahid\AppData\Local\R\win-library\4.4\00LOCK\readr\libs\x64\readr.dll
## to C:\Users\zahid\AppData\Local\R\win-library\4.4\readr\libs\x64\readr.dll:
## Permission denied
## Warning: restored 'readr'
## package 'sf' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'sf'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\zahid\AppData\Local\R\win-library\4.4\00LOCK\sf\libs\x64\sf.dll to
## C:\Users\zahid\AppData\Local\R\win-library\4.4\sf\libs\x64\sf.dll: Permission
## denied
## Warning: restored 'sf'
## package 'htmlwidgets' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\zahid\AppData\Local\Temp\RtmpugLKfb\downloaded_packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.2     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(ggplot2)
library(leaflet)
library(readr)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(htmlwidgets)

Loading the dataset:

crime_data2024 <- read.csv("https://media.githubusercontent.com/media/zahid607/Final-Project/refs/heads/main/NYPD_Complaint_Data_2024.csv")

head(crime_data2024)
##   CMPLNT_NUM CMPLNT_FR_DT CMPLNT_FR_TM CMPLNT_TO_DT CMPLNT_TO_TM ADDR_PCT_CD
## 1  298685380   12/30/2024     18:58:00   12/30/2024     19:00:00         111
## 2  298690828   12/31/2024     13:00:00   12/31/2024     13:10:00         113
## 3  298762768   12/30/2024     19:00:00   12/31/2024      7:00:00         111
## 4  298704506   12/28/2024     19:00:00   12/28/2024     19:15:00         101
## 5  298672727   12/31/2024      3:15:00   12/31/2024      3:20:00         105
## 6  298698016   12/31/2024      8:00:00   12/31/2024      9:00:00         116
##       RPT_DT KY_CD                      OFNS_DESC PD_CD
## 1 12/31/2024   351 CRIMINAL MISCHIEF & RELATED OF   258
## 2 12/31/2024   344   ASSAULT 3 & RELATED OFFENSES   101
## 3 12/31/2024   341                  PETIT LARCENY   357
## 4 12/31/2024   578                  HARRASSMENT 2   638
## 5 12/31/2024   126        MISCELLANEOUS PENAL LAW   198
## 6 12/31/2024   344   ASSAULT 3 & RELATED OFFENSES   101
##                          PD_DESC CRM_ATPT_CPTD_CD  LAW_CAT_CD BORO_NM
## 1 CRIMINAL MISCHIEF 4TH, GRAFFIT        COMPLETED MISDEMEANOR  QUEENS
## 2                      ASSAULT 3        COMPLETED MISDEMEANOR  QUEENS
## 3 LARCENY,PETIT OF VEHICLE ACCES        COMPLETED MISDEMEANOR  QUEENS
## 4          HARASSMENT,SUBD 3,4,5        COMPLETED   VIOLATION  QUEENS
## 5            CRIMINAL CONTEMPT 1        COMPLETED      FELONY  QUEENS
## 6                      ASSAULT 3        COMPLETED MISDEMEANOR  QUEENS
##   LOC_OF_OCCUR_DESC          PREM_TYP_DESC       JURIS_DESC JURISDICTION_CODE
## 1           REAR OF    COMMERCIAL BUILDING N.Y. POLICE DEPT                 0
## 2            INSIDE        RESIDENCE-HOUSE N.Y. POLICE DEPT                 0
## 3            (null)                 STREET N.Y. POLICE DEPT                 0
## 4            INSIDE RESIDENCE - APT. HOUSE N.Y. POLICE DEPT                 0
## 5            INSIDE        RESIDENCE-HOUSE N.Y. POLICE DEPT                 0
## 6            INSIDE        RESIDENCE-HOUSE N.Y. POLICE DEPT                 0
##   PARKS_NM HADEVELOPT HOUSING_PSA X_COORD_CD Y_COORD_CD SUSP_AGE_GROUP
## 1   (null)     (null)          NA    1061097     216033          25-44
## 2   (null)     (null)          NA    1046104     187464            <18
## 3   (null)     (null)          NA    1052283     211781        UNKNOWN
## 4   (null)     (null)          NA    1053361     158350        UNKNOWN
## 5   (null)     (null)          NA    1058127     199040          25-44
## 6   (null)     (null)          NA    1048028     178970          25-44
##        SUSP_RACE SUSP_SEX TRANSIT_DISTRICT Latitude Longitude
## 1          WHITE        M               NA 40.75931 -73.72261
## 2          BLACK        F               NA 40.68101 -73.77699
## 3        UNKNOWN        U               NA 40.74770 -73.75447
## 4          BLACK        F               NA 40.60105 -73.75112
## 5 WHITE HISPANIC        M               NA 40.71270 -73.73351
## 6          BLACK        M               NA 40.65769 -73.77013
##                                   Lat_Lon              PATROL_BORO STATION_NAME
## 1                 (40.759313, -73.722608) PATROL BORO QUEENS NORTH       (null)
## 2                 (40.681014, -73.776991) PATROL BORO QUEENS SOUTH       (null)
## 3 (40.74770474705376, -73.75446623419606) PATROL BORO QUEENS NORTH       (null)
## 4                 (40.601049, -73.751124) PATROL BORO QUEENS SOUTH       (null)
## 5                 (40.712698, -73.733514) PATROL BORO QUEENS SOUTH       (null)
## 6                 (40.657687, -73.770132) PATROL BORO QUEENS SOUTH       (null)
##   VIC_AGE_GROUP       VIC_RACE VIC_SEX
## 1       UNKNOWN        UNKNOWN       D
## 2           <18          BLACK       M
## 3         25-44          BLACK       M
## 4         25-44        UNKNOWN       F
## 5         45-64 WHITE HISPANIC       F
## 6         18-24 WHITE HISPANIC       F

Structure of the dataset:

str(crime_data2024)
## 'data.frame':    577108 obs. of  35 variables:
##  $ CMPLNT_NUM       : chr  "298685380" "298690828" "298762768" "298704506" ...
##  $ CMPLNT_FR_DT     : chr  "12/30/2024" "12/31/2024" "12/30/2024" "12/28/2024" ...
##  $ CMPLNT_FR_TM     : chr  "18:58:00" "13:00:00" "19:00:00" "19:00:00" ...
##  $ CMPLNT_TO_DT     : chr  "12/30/2024" "12/31/2024" "12/31/2024" "12/28/2024" ...
##  $ CMPLNT_TO_TM     : chr  "19:00:00" "13:10:00" "7:00:00" "19:15:00" ...
##  $ ADDR_PCT_CD      : int  111 113 111 101 105 116 106 105 113 111 ...
##  $ RPT_DT           : chr  "12/31/2024" "12/31/2024" "12/31/2024" "12/31/2024" ...
##  $ KY_CD            : int  351 344 341 578 126 344 105 126 109 344 ...
##  $ OFNS_DESC        : chr  "CRIMINAL MISCHIEF & RELATED OF" "ASSAULT 3 & RELATED OFFENSES" "PETIT LARCENY" "HARRASSMENT 2" ...
##  $ PD_CD            : int  258 101 357 638 198 101 396 198 409 101 ...
##  $ PD_DESC          : chr  "CRIMINAL MISCHIEF 4TH, GRAFFIT" "ASSAULT 3" "LARCENY,PETIT OF VEHICLE ACCES" "HARASSMENT,SUBD 3,4,5" ...
##  $ CRM_ATPT_CPTD_CD : chr  "COMPLETED" "COMPLETED" "COMPLETED" "COMPLETED" ...
##  $ LAW_CAT_CD       : chr  "MISDEMEANOR" "MISDEMEANOR" "MISDEMEANOR" "VIOLATION" ...
##  $ BORO_NM          : chr  "QUEENS" "QUEENS" "QUEENS" "QUEENS" ...
##  $ LOC_OF_OCCUR_DESC: chr  "REAR OF" "INSIDE" "(null)" "INSIDE" ...
##  $ PREM_TYP_DESC    : chr  "COMMERCIAL BUILDING" "RESIDENCE-HOUSE" "STREET" "RESIDENCE - APT. HOUSE" ...
##  $ JURIS_DESC       : chr  "N.Y. POLICE DEPT" "N.Y. POLICE DEPT" "N.Y. POLICE DEPT" "N.Y. POLICE DEPT" ...
##  $ JURISDICTION_CODE: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PARKS_NM         : chr  "(null)" "(null)" "(null)" "(null)" ...
##  $ HADEVELOPT       : chr  "(null)" "(null)" "(null)" "(null)" ...
##  $ HOUSING_PSA      : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ X_COORD_CD       : int  1061097 1046104 1052283 1053361 1058127 1048028 1034548 1058127 1047071 1047564 ...
##  $ Y_COORD_CD       : int  216033 187464 211781 158350 199040 178970 185628 199040 189114 217542 ...
##  $ SUSP_AGE_GROUP   : chr  "25-44" "<18" "UNKNOWN" "UNKNOWN" ...
##  $ SUSP_RACE        : chr  "WHITE" "BLACK" "UNKNOWN" "BLACK" ...
##  $ SUSP_SEX         : chr  "M" "F" "U" "F" ...
##  $ TRANSIT_DISTRICT : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Latitude         : num  40.8 40.7 40.7 40.6 40.7 ...
##  $ Longitude        : num  -73.7 -73.8 -73.8 -73.8 -73.7 ...
##  $ Lat_Lon          : chr  "(40.759313, -73.722608)" "(40.681014, -73.776991)" "(40.74770474705376, -73.75446623419606)" "(40.601049, -73.751124)" ...
##  $ PATROL_BORO      : chr  "PATROL BORO QUEENS NORTH" "PATROL BORO QUEENS SOUTH" "PATROL BORO QUEENS NORTH" "PATROL BORO QUEENS SOUTH" ...
##  $ STATION_NAME     : chr  "(null)" "(null)" "(null)" "(null)" ...
##  $ VIC_AGE_GROUP    : chr  "UNKNOWN" "<18" "25-44" "25-44" ...
##  $ VIC_RACE         : chr  "UNKNOWN" "BLACK" "BLACK" "UNKNOWN" ...
##  $ VIC_SEX          : chr  "D" "M" "M" "F" ...

Columns of the dataset:

colnames(crime_data2024)
##  [1] "CMPLNT_NUM"        "CMPLNT_FR_DT"      "CMPLNT_FR_TM"     
##  [4] "CMPLNT_TO_DT"      "CMPLNT_TO_TM"      "ADDR_PCT_CD"      
##  [7] "RPT_DT"            "KY_CD"             "OFNS_DESC"        
## [10] "PD_CD"             "PD_DESC"           "CRM_ATPT_CPTD_CD" 
## [13] "LAW_CAT_CD"        "BORO_NM"           "LOC_OF_OCCUR_DESC"
## [16] "PREM_TYP_DESC"     "JURIS_DESC"        "JURISDICTION_CODE"
## [19] "PARKS_NM"          "HADEVELOPT"        "HOUSING_PSA"      
## [22] "X_COORD_CD"        "Y_COORD_CD"        "SUSP_AGE_GROUP"   
## [25] "SUSP_RACE"         "SUSP_SEX"          "TRANSIT_DISTRICT" 
## [28] "Latitude"          "Longitude"         "Lat_Lon"          
## [31] "PATROL_BORO"       "STATION_NAME"      "VIC_AGE_GROUP"    
## [34] "VIC_RACE"          "VIC_SEX"

Clean up date and time and prepare the data:

crime_data2024 <- crime_data2024 %>%
  mutate(
    RPT_DT = as.Date(RPT_DT, format = "%m/%d/%Y"),
    CMPLNT_FR_TM = hms::as_hms(CMPLNT_FR_TM),
    Hour = hour(CMPLNT_FR_TM),
    Day = wday(RPT_DT, label = TRUE),
    Month = month(RPT_DT, label = TRUE),
    Week = isoweek(RPT_DT),
    Season = case_when(
      as.character(Month) %in% c("Dec", "Jan", "Feb") ~ "Winter",
      as.character(Month) %in% c("Mar", "Apr", "May") ~ "Spring",
      as.character(Month) %in% c("Jun", "Jul", "Aug") ~ "Summer",
      TRUE ~ "Fall"
    )
  )

Index Crimes of the dataset:

index_crimes <- c("MURDER & NON-NEGL. MANSLAUGHTER",
                  "RAPE",
                  "ROBBERY",
                  "FELONY ASSAULT",
                  "BURGLARY",
                  "GRAND LARCENY",
                  "GRAND LARCENY OF MOTOR VEHICLE")


index_crime_data <- crime_data2024 %>%
  filter(OFNS_DESC %in% index_crimes)

index_crime_data %>%
  count(OFNS_DESC, sort = TRUE)
##                         OFNS_DESC     n
## 1                   GRAND LARCENY 48445
## 2                  FELONY ASSAULT 29452
## 3                         ROBBERY 16574
## 4  GRAND LARCENY OF MOTOR VEHICLE 14193
## 5                        BURGLARY 13067
## 6                            RAPE  1749
## 7 MURDER & NON-NEGL. MANSLAUGHTER   382

Index Crimes group by borough:

index_crime_summary <- crime_data2024 %>%
  filter(OFNS_DESC %in% index_crimes) %>%
  group_by(BORO_NM) %>%
  summarise(Total_Index_Crimes = n()) %>%
  arrange(desc(Total_Index_Crimes))

# View the result
print(index_crime_summary)
## # A tibble: 6 × 2
##   BORO_NM       Total_Index_Crimes
##   <chr>                      <int>
## 1 MANHATTAN                  31404
## 2 BROOKLYN                   30751
## 3 BRONX                      30393
## 4 QUEENS                     27849
## 5 STATEN ISLAND               3448
## 6 (null)                        17

Now Focus on robbery:

robbery_data <- crime_data2024 %>%
  filter(OFNS_DESC == "ROBBERY", !is.na(Latitude), !is.na(Longitude))

Filter for robbery crime and group by borough & Bar diagram:

robbery_by_borough <- crime_data2024 %>%
  filter(OFNS_DESC == "ROBBERY") %>%
  group_by(BORO_NM) %>%
  summarise(Robbery_Count = n()) %>%
  arrange(desc(Robbery_Count))

# Bar plot
ggplot(robbery_by_borough, aes(x = reorder(BORO_NM, Robbery_Count), y = Robbery_Count)) +
  geom_bar(stat = "identity", fill = "LightBlue") +
  geom_text(aes(label = Robbery_Count), hjust = -0.1, size = 4) +  # Add labels
  coord_flip() +  # Rotate for readability
  labs(title = "Robbery Counts by Borough",
       x = "Borough",
       y = "Number of Robbery Complaints") +
  theme_minimal()

Robberies by Hour:

ggplot(robbery_data, aes(x = Hour)) +
  geom_bar(fill = "tomato") +
  labs(title = "Robberies by Hour", x = "Hour of Day", y = "Count") +
  theme_minimal()

Robberies by day of week:

ggplot(robbery_data, aes(x = Day)) +
  geom_bar(fill = "skyblue") +
  labs(title = "Robberies by Day of Week", x = "Day", y = "Count") +
  theme_minimal()

Monthly Robbery Trend:

# Group by month
monthly_robbery <- robbery_data %>%
  group_by(Month) %>%
  summarise(Count = n())

ggplot(monthly_robbery, aes(x = Month, y = Count)) +
  geom_col(fill = "steelblue") +
  labs(title = "Monthly Robbery Trends in 2024", y = "Number of Robberies") +
  theme_minimal()

Seasonal Robbery Trend:

ggplot(robbery_data, aes(x = Season)) +
  geom_bar(fill = "steelblue") +
  labs(
    title = "Number of Robbery Crimes by Season",
    x = "Season",
    y = "Number of Robbery Crimes"
  ) +
  theme_minimal()

Spatial Mapping with Leaflet (Hotspot Visualization):

leaflet(data = robbery_data) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~Longitude,
    lat = ~Latitude,
    radius = 4,
    color = "red",
    fillOpacity = 0.4,
    popup = ~paste("Date:", RPT_DT)
  )

To save the map:

map <- leaflet(data = robbery_data) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~Longitude,
    lat = ~Latitude,
    radius = 4,
    color = "red",
    fillOpacity = 0.4
  )

htmlwidgets::saveWidget(map, "robbery_hotspots_2024.html")