library(tidyverse)
library(sf)
library(RSocrata)
library(leaflet)

#### use scorata api to pull in fatalities
raw_fatalities <- read.socrata("https://data.cityofnewyork.us/resource/h9gi-nx95.json?$where=number_of_persons_killed > 0") |> 
  select(collision_id, everything())


#### remove NA and 0,0
fatalities <- raw_fatalities |> 
  filter(!is.na(latitude)) |> 
  filter(longitude != "0.0000000") |> 
  mutate(lat_3 = round(as.numeric(latitude), 3),
         long_3 = round(as.numeric(longitude), 3))

#### convert to spatial dataframe
fatalities_sf <- st_as_sf(fatalities, coords = c("longitude", "latitude"), crs=4326)

Create map of fatalities since 2012

#### view on map to explore
fatalities_map <- leaflet() |>
  addProviderTiles(provider = "CartoDB.Positron") |>
  addCircleMarkers(data = fatalities_sf,
                   radius = 2)

fatalities_map
#### calculate the number of fatalities by location and the zip code
count_fatalities <- fatalities %>%
  group_by(lat_3, long_3) |> 
  summarise(count = n(),
            #### there are a lot of missing zip codes, can add zip codes using spatial join later
            zip_code = first(zip_code)) |> 
  arrange(desc(count))
## `summarise()` has grouped output by 'lat_3'. You can override using the
## `.groups` argument.
head(count_fatalities)