Introduction

Analyzing and visualizing data on motor vehicle collisions at the neighborhood level in New York City. The analysis focuses on the neighborhood of Corona in Queens.

library(tidyverse)
library(knitr)
library(RSocrata)
library(sf)
library(scales)
library(RColorBrewer)
library(ggplot2)
library(dplyr)

Methods

Neighborhood boundaries are represented with a shapefile of Neighborhood Tabulation Areas that was downloaded from NYC Planning.

The data for this analysis was originally downloaded directly from NYC Open Data Portal accessed with the RSocrata package. Due to the size of the file, a csv version was downloaded and used for the purposes of this analysis. The steps for downloading the data directly from NYC Open Data and writing it as a csv is still displayed below. Data on collisions that was used for this analysis included:

## raw_collisions <- read.socrata("https://data.cityofnewyork.us/resource/h9gi-nx95.csv")
## write.csv(raw_collisions, "data/raw/raw_collisions.csv")

raw_collisions <- read_csv("~/Desktop/methods1/part2/data/raw/raw_collisions.csv")

nabes <- st_read("~/Desktop/methods1/part2/data/raw/geo/nynta2020_23c/nynta2020.shp")

geo_corona <- nabes |>
  filter(NTAAbbrev %in% c("NrthCrna", "Elmhrst", "Crna", "FlshMdwCPk"))

collisions23 <- raw_collisions |>
  separate(crash_date, into = c("crash_year", "month", "day"), sep = "-") |>
  filter(crash_year == 2023)

raw_corona_collisions <- collisions23 |>
  filter(zip_code == 11368 & crash_year == 2023)

raw_corona_collisions = unite(raw_corona_collisions, month_day, c("month", "day"))

Analysis

borough_collisions_summary <- collisions23 |>
  filter(!is.na(zip_code)) |>
  group_by(borough) |>
  summarise(`Total Collisions Jan-Nov 2023` = comma(n())) |>
  rename(Borough = "borough")

zip_collisions_summary <- collisions23 |>
  filter(!is.na(zip_code)) |>
  group_by(zip_code, borough) |>
  summarise(`Total Collisions Jan-Nov 2023` = comma(n())) |>
  rename(Borough = "borough",
         "Zip Code" = "zip_code")

corona_collisions_summary <- raw_corona_collisions |>
  group_by(zip_code, borough) |>
  summarise(`Total Collisions in Corona Jan-Nov 2023` = comma(n()))

Processing Spatial Data

corona_collisions_points <- select(raw_corona_collisions,
                            month_day, latitude, longitude) |>
  filter(!(is.na(latitude))) |>
  filter(longitude < 0.00000)


collision_points = st_as_sf(corona_collisions_points, coords = c("longitude", "latitude"), 
                            crs = 4326)

collision_points2263 <- st_transform(collision_points, 2263)
st_crs(collision_points2263)


collisions_nabes <- collision_points2263 |>
  st_join(nabes, 
          left = TRUE,
          join = st_intersects,
          largest = TRUE)

Results

ggplot()  + 
  geom_sf(data = nabes |>
            filter(NTAAbbrev %in% c("NrthCrna", "Elmhrst", "Crna", "FlshMdwCPk")),
          color = "transparent",
          lwd = 1) +
  geom_sf(data = collisions_nabes,
          mapping = aes(fill = "geometry")) +
  theme_void()

The map above shows the locations of motor vehicle collisions in the neighborhood of Corona, Queens, in 2023.