library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
setwd("C:/Users/SwagD/Downloads/Data 110")
hatecrimes <- read_csv("NYPD_Hate_Crimes_20260222.csv")
## Rows: 4029 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Record Create Date, Patrol Borough Name, County, Law Code Category ...
## dbl (4): Full Complaint ID, Complaint Year Number, Month Number, Complaint P...
## lgl (1): Arrest Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(hatecrimes) <- tolower(names(hatecrimes))
names(hatecrimes) <- gsub(" ","",names(hatecrimes))
head(hatecrimes)
## # A tibble: 6 × 14
## fullcomplaintid complaintyearnumber monthnumber recordcreatedate
## <dbl> <dbl> <dbl> <chr>
## 1 2.02e14 2019 1 01/23/2019
## 2 2.02e14 2019 2 02/25/2019
## 3 2.02e14 2019 2 02/27/2019
## 4 2.02e14 2019 4 04/16/2019
## 5 2.02e14 2019 6 06/20/2019
## 6 2.02e14 2019 7 07/31/2019
## # ℹ 10 more variables: complaintprecinctcode <dbl>, patrolboroughname <chr>,
## # county <chr>, lawcodecategorydescription <chr>, offensedescription <chr>,
## # pdcodedescription <chr>, biasmotivedescription <chr>,
## # offensecategory <chr>, arrestdate <lgl>, arrestid <chr>
bias_count <- hatecrimes |>
select(biasmotivedescription) |>
group_by(biasmotivedescription) |>
count() |>
arrange(desc(n))
head(bias_count)
## # A tibble: 6 × 2
## # Groups: biasmotivedescription [6]
## biasmotivedescription n
## <chr> <int>
## 1 ANTI-JEWISH 1906
## 2 ANTI-MALE HOMOSEXUAL (GAY) 489
## 3 ANTI-ASIAN 401
## 4 ANTI-BLACK 315
## 5 ANTI-OTHER ETHNICITY 168
## 6 ANTI-MUSLIM 156
ggplot(hatecrimes, aes(x = biasmotivedescription))+
geom_bar()
bias_count |>
head(10) |>
ggplot(aes(x=biasmotivedescription, y = n)) +
geom_col()
bias_count |>
head(10) |>
ggplot(aes(x=reorder(biasmotivedescription, n), y = n)) +
geom_col() +
coord_flip()
bias_count |>
head(10) |>
ggplot(aes(x=reorder(biasmotivedescription, n), y = n)) +
geom_col() +
coord_flip()+
labs(x = "",
y = "Counts of hatecrime types based on motive",
title = "Bar Graph of Hate Crimes from 2019-2026",
subtitle = "Counts based on the hatecrime motive",
caption = "Source: NY State Division of Criminal Justice Services")
bias_count |>
head(10) |>
ggplot(aes(x=reorder(biasmotivedescription, n), y = n)) +
geom_col(fill = "salmon") +
coord_flip()+
labs(x = "",
y = "Counts of hatecrime types based on motive",
title = "Bar Graph of Hate Crimes from 2019-2026",
subtitle = "Counts based on the hatecrime motive",
caption = "Source: NY State Division of Criminal Justice Services") +
theme_minimal()
bias_count |>
head(10) |>
ggplot(aes(x=reorder(biasmotivedescription, n), y = n)) +
geom_col(fill = "salmon") +
coord_flip()+
labs(x = "",
y = "Counts of hatecrime types based on motive",
title = "Bar Graph of Hate Crimes from 2019-2026",
subtitle = "Counts based on the hatecrime motive",
caption = "Source: NY State Division of Criminal Justice Services") +
theme_minimal()+
geom_text(aes(label = n), hjust = -.05, size = 3) +
theme(axis.text.x = element_blank())
hate_year <- hatecrimes |>
filter(biasmotivedescription %in% c("ANTI-JEWISH", "ANTI-MALE HOMOSEXUAL (GAY)", "ANTI-ASIAN", "ANTI-BLACK"))|>
group_by(complaintyearnumber) |>
count(biasmotivedescription)|>
arrange(desc(n))
hate_year
## # A tibble: 28 × 3
## # Groups: complaintyearnumber [7]
## complaintyearnumber biasmotivedescription n
## <dbl> <chr> <int>
## 1 2024 ANTI-JEWISH 371
## 2 2023 ANTI-JEWISH 343
## 3 2025 ANTI-JEWISH 320
## 4 2022 ANTI-JEWISH 279
## 5 2019 ANTI-JEWISH 252
## 6 2021 ANTI-JEWISH 215
## 7 2021 ANTI-ASIAN 150
## 8 2020 ANTI-JEWISH 126
## 9 2023 ANTI-MALE HOMOSEXUAL (GAY) 116
## 10 2022 ANTI-ASIAN 91
## # ℹ 18 more rows
hate_county <- hatecrimes |>
filter(biasmotivedescription %in% c("ANTI-JEWISH", "ANTI-MALE HOMOSEXUAL (GAY)", "ANTI-ASIAN", "ANTI-BLACK"))|>
group_by(county) |>
count(biasmotivedescription)|>
arrange(desc(n))
hate_county
## # A tibble: 20 × 3
## # Groups: county [5]
## county biasmotivedescription n
## <chr> <chr> <int>
## 1 KINGS ANTI-JEWISH 798
## 2 NEW YORK ANTI-JEWISH 651
## 3 QUEENS ANTI-JEWISH 289
## 4 NEW YORK ANTI-MALE HOMOSEXUAL (GAY) 237
## 5 NEW YORK ANTI-ASIAN 228
## 6 KINGS ANTI-MALE HOMOSEXUAL (GAY) 120
## 7 KINGS ANTI-BLACK 99
## 8 BRONX ANTI-JEWISH 92
## 9 QUEENS ANTI-MALE HOMOSEXUAL (GAY) 91
## 10 KINGS ANTI-ASIAN 80
## 11 NEW YORK ANTI-BLACK 79
## 12 QUEENS ANTI-ASIAN 78
## 13 RICHMOND ANTI-JEWISH 76
## 14 QUEENS ANTI-BLACK 75
## 15 BRONX ANTI-MALE HOMOSEXUAL (GAY) 35
## 16 RICHMOND ANTI-BLACK 35
## 17 BRONX ANTI-BLACK 27
## 18 BRONX ANTI-ASIAN 10
## 19 RICHMOND ANTI-MALE HOMOSEXUAL (GAY) 6
## 20 RICHMOND ANTI-ASIAN 5
hate2 <- hatecrimes |>
filter(biasmotivedescription %in% c("ANTI-JEWISH", "ANTI-MALE HOMOSEXUAL (GAY)", "ANTI-ASIAN", "ANTI-BLACK"))|>
group_by(complaintyearnumber, county) |>
count(biasmotivedescription)|>
arrange(desc(n))
hate2
## # A tibble: 127 × 4
## # Groups: complaintyearnumber, county [35]
## complaintyearnumber county biasmotivedescription n
## <dbl> <chr> <chr> <int>
## 1 2024 KINGS ANTI-JEWISH 152
## 2 2024 NEW YORK ANTI-JEWISH 136
## 3 2025 KINGS ANTI-JEWISH 136
## 4 2019 KINGS ANTI-JEWISH 128
## 5 2023 KINGS ANTI-JEWISH 126
## 6 2022 KINGS ANTI-JEWISH 125
## 7 2023 NEW YORK ANTI-JEWISH 124
## 8 2025 NEW YORK ANTI-JEWISH 110
## 9 2022 NEW YORK ANTI-JEWISH 104
## 10 2021 NEW YORK ANTI-ASIAN 84
## # ℹ 117 more rows
ggplot(data = hate2) +
geom_bar(aes(x=complaintyearnumber, y=n, fill = biasmotivedescription),
position = "dodge", stat = "identity") +
labs(fill = "Hate Crime Type",
y = "Number of Hate Crime Incidents",
title = "Hate Crime Type in NY Counties Between 2010-2016",
caption = "Source: NY State Division of Criminal Justice Services")
ggplot(data = hate2) +
geom_bar(aes(x=county, y=n, fill = biasmotivedescription),
position = "dodge", stat = "identity") +
labs(fill = "Hate Crime Type",
y = "Number of Hate Crime Incidents",
title = "Hate Crime Type in NY Counties Between 2010-2016",
caption = "Source: NY State Division of Criminal Justice Services")
ggplot(data = hate2) +
geom_bar(aes(x=complaintyearnumber, y=n, fill = biasmotivedescription),
position = "dodge", stat = "identity") +
facet_wrap(~county) +
labs(fill = "Hate Crime Type",
y = "Number of Hate Crime Incidents",
title = "Hate Crime Type in NY Counties Between 2010-2016",
caption = "Source: NY State Division of Criminal Justice Services")
setwd("C:/Users/SwagD/Downloads/Data 110")
nypop <- read_csv("nyc_census_pop_2020.csv")
## Rows: 62 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Area Name, Population Percent Change
## num (2): 2020 Census Population, Population Change
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nypop$`Area Name` <- gsub(" County", "", nypop$`Area Name`)
nypop2 <- nypop |>
rename(county = `Area Name`)|>
select(county, `2020 Census Population`)
head(nypop2)
## # A tibble: 6 × 2
## county `2020 Census Population`
## <chr> <dbl>
## 1 Albany 314848
## 2 Allegany 46456
## 3 Bronx 1472654
## 4 Broome 198683
## 5 Cattaraugus 77042
## 6 Cayuga 76248
datajoin <- left_join(hate2, nypop2, by=c("county"))
datajoin
## # A tibble: 127 × 5
## # Groups: complaintyearnumber, county [35]
## complaintyearnumber county biasmotivedescription n 2020 Census Populati…¹
## <dbl> <chr> <chr> <int> <dbl>
## 1 2024 KINGS ANTI-JEWISH 152 NA
## 2 2024 NEW Y… ANTI-JEWISH 136 NA
## 3 2025 KINGS ANTI-JEWISH 136 NA
## 4 2019 KINGS ANTI-JEWISH 128 NA
## 5 2023 KINGS ANTI-JEWISH 126 NA
## 6 2022 KINGS ANTI-JEWISH 125 NA
## 7 2023 NEW Y… ANTI-JEWISH 124 NA
## 8 2025 NEW Y… ANTI-JEWISH 110 NA
## 9 2022 NEW Y… ANTI-JEWISH 104 NA
## 10 2021 NEW Y… ANTI-ASIAN 84 NA
## # ℹ 117 more rows
## # ℹ abbreviated name: ¹`2020 Census Population`
hate_new <- hate2 |>
mutate(county = as_factor(str_to_lower(as.character(county))))
nypop_new <- nypop2 |>
mutate(county = as_factor(str_to_lower(as.character(county))))
datajoin <- left_join(hate_new, nypop_new, by=c("county"))
datajoin
## # A tibble: 127 × 5
## # Groups: complaintyearnumber, county [35]
## complaintyearnumber county biasmotivedescription n 2020 Census Populati…¹
## <dbl> <fct> <chr> <int> <dbl>
## 1 2024 kings ANTI-JEWISH 152 2736074
## 2 2024 new y… ANTI-JEWISH 136 1694251
## 3 2025 kings ANTI-JEWISH 136 2736074
## 4 2019 kings ANTI-JEWISH 128 2736074
## 5 2023 kings ANTI-JEWISH 126 2736074
## 6 2022 kings ANTI-JEWISH 125 2736074
## 7 2023 new y… ANTI-JEWISH 124 1694251
## 8 2025 new y… ANTI-JEWISH 110 1694251
## 9 2022 new y… ANTI-JEWISH 104 1694251
## 10 2021 new y… ANTI-ASIAN 84 1694251
## # ℹ 117 more rows
## # ℹ abbreviated name: ¹`2020 Census Population`
datajoinrate <- datajoin |>
mutate(rate = n/`2020 Census Population`* 100000) |>
arrange(desc(rate))
datajoinrate
## # A tibble: 127 × 6
## # Groups: complaintyearnumber, county [35]
## complaintyearnumber county biasmotivedescription n 2020 Census Populati…¹
## <dbl> <fct> <chr> <int> <dbl>
## 1 2024 new y… ANTI-JEWISH 136 1694251
## 2 2023 new y… ANTI-JEWISH 124 1694251
## 3 2025 new y… ANTI-JEWISH 110 1694251
## 4 2022 new y… ANTI-JEWISH 104 1694251
## 5 2024 kings ANTI-JEWISH 152 2736074
## 6 2025 kings ANTI-JEWISH 136 2736074
## 7 2021 new y… ANTI-ASIAN 84 1694251
## 8 2021 new y… ANTI-JEWISH 84 1694251
## 9 2019 kings ANTI-JEWISH 128 2736074
## 10 2023 kings ANTI-JEWISH 126 2736074
## # ℹ 117 more rows
## # ℹ abbreviated name: ¹`2020 Census Population`
## # ℹ 1 more variable: rate <dbl>
The hate crimes data set presents a number of advantages to be used for research. For example, it includes data from multiple years and all states, which researchers can use to examine trends over time as well as geographic variations in hate crimes. It also presents the data in an in-depth manner with counts and details about offense types and motivations/categories of bias that allow for examination of specific hate crime patterns. The accessibility of the data due to its public availability also facilitates reproducible research. The data set is not without drawbacks, though. It suffers from severe shortcomings in under reporting due to inconsistent reporting by some law enforcement agencies. This can obscure the actual rate and nature of hate crimes, thereby impacting accuracy. Furthermore, some of the variables may not be consistent throughout the years, making them unreliable for a study spanning time. Two further research avenues I could explore with this data set are to consider trends within each type of bias over states over time, or to test for correlation between the level of hate crime and the level of diversity or income in that state.