endpoint <- "https://data.cityofnewyork.us/resource/833y-fsy8.json"
resp <- httr::GET(endpoint, query = list("$limit" = 30000, "$order" = "occur_date DESC"))
shooting_data <- jsonlite::fromJSON(httr::content(resp, as = "text"), flatten = TRUE)
sum(is.na(shooting_data$perp_age_group))
## [1] 9344
shooting_data <- shooting_data %>%
filter(!is.na(perp_age_group))
shooting_data <- shooting_data %>%
mutate(
occur_time = hms::as_hms(occur_time),
hour = lubridate::hour(occur_time),
time_of_day = case_when(
hour >= 3 & hour < 12 ~ "Morning",
hour >= 12 & hour < 17 ~ "Afternoon",
TRUE ~ "Night"
)
)
shooting_data %>%
summarise(
perc_female = mean(perp_sex == "F", na.rm = TRUE) * 100
)
## perc_female
## 1 2.259804
ggplot(shooting_data, aes(x = time_of_day)) +
geom_bar(fill = "purple") +
labs(
title = "Shootings by Time of Day",
x = "Time of Day",
y = "# of Shootings"
) +
theme_minimal(base_size = 16) +
theme(
plot.title = element_text(size = 15, face = "italic")
)
# (B) personal insight
ggplot(shooting_data, aes(x = perp_sex)) +
geom_bar(fill = "red") +
facet_wrap(~ boro) +
labs(
title = "Shootings by Gender, Split by Borough",
x = "Gender of Shooter",
y = "# of Shootings"
) +
theme_minimal(base_size = 15) +
theme(
plot.title = element_text(size = 15, face = "bold")
)
shooting_data %>%
count(boro, perp_sex) %>%
knitr::kable(caption = "Shootings by Gender and Borough")
boro | perp_sex | n |
---|---|---|
BRONX | (null) | 518 |
BRONX | F | 134 |
BRONX | M | 5279 |
BRONX | U | 391 |
BROOKLYN | (null) | 635 |
BROOKLYN | F | 146 |
BROOKLYN | M | 5971 |
BROOKLYN | U | 642 |
MANHATTAN | (null) | 191 |
MANHATTAN | F | 87 |
MANHATTAN | M | 2484 |
MANHATTAN | U | 185 |
QUEENS | (null) | 257 |
QUEENS | F | 79 |
QUEENS | M | 2502 |
QUEENS | U | 222 |
STATEN ISLAND | (null) | 27 |
STATEN ISLAND | F | 15 |
STATEN ISLAND | M | 609 |
STATEN ISLAND | U | 26 |
NYC Shooting Incident Data (NYPD)