library(tidyverse)
library(knitr)
setwd("C:/Users/wesle/Downloads/Data 110")
hatecrimes <- read_csv("NYPD_Hate_Crimes_19-26.csv")HW Assignment 3 (Samimi)
Load Libraries & Data
Clean up Data
names(hatecrimes) <- tolower(names(hatecrimes))
names(hatecrimes) <- gsub(" ", "", names(hatecrimes))
head(hatecrimes)# A tibble: 6 × 14
fullcomplaintid complaintyearnumber monthnumber recordcreatedate
<dbl> <dbl> <dbl> <chr>
1 2.02e14 2019 1 1/23/2019
2 2.02e14 2019 2 2/25/2019
3 2.02e14 2019 2 2/27/2019
4 2.02e14 2019 4 4/16/2019
5 2.02e14 2019 6 6/20/2019
6 2.02e14 2019 7 7/31/2019
# ℹ 10 more variables: complaintprecinctcode <dbl>, patrolboroughname <chr>,
# county <chr>, lawcodecategorydescription <chr>, offensedescription <chr>,
# pdcodedescription <chr>, biasmotivedescription <chr>,
# offensecategory <chr>, arrestdate <lgl>, arrestid <chr>
Explore the Bias
bias_count <- hatecrimes |>
select(biasmotivedescription) |>
group_by(biasmotivedescription) |>
count() |>
arrange(desc(n))
head(bias_count)# A tibble: 6 × 2
# Groups: biasmotivedescription [6]
biasmotivedescription n
<chr> <int>
1 ANTI-JEWISH 1906
2 ANTI-MALE HOMOSEXUAL (GAY) 489
3 ANTI-ASIAN 401
4 ANTI-BLACK 315
5 ANTI-OTHER ETHNICITY 168
6 ANTI-MUSLIM 156
Visualize Counts
ggplot(hatecrimes, aes(x = biasmotivedescription)) +
geom_bar()Use Inclusion/Exclusion to filter
bias_count |>
head(10) |>
ggplot(aes(x=biasmotivedescription, y = n)) +
geom_col()Rearrange Bar Graph
bias_count |>
head(10) |>
ggplot(aes(x=reorder(biasmotivedescription, n), y = n)) +
geom_col() +
coord_flip()Add Title, Caption, Data Source, Label
bias_count |>
head(10) |>
ggplot(aes(x=reorder(biasmotivedescription, n), y = n)) +
geom_col() +
coord_flip() +
labs(x = "",
y = "Counts of hatecrime types based on motive",
title = "Bar Graph of Hate Crime from 2019-2026",
subtitle = "Count Based on the Hatecrime Motive",
caption = "Source: NY State Division of Criminal Justice Services")Add Color & Change Theme
bias_count |>
head(10) |>
ggplot(aes(x=reorder(biasmotivedescription, n), y = n)) +
geom_col(fill = "salmon") +
coord_flip()+
labs(x = "",
y = "Counts of hatecrime types based on motive",
title = "Bar Graph of Hate Crimes from 2019-2026",
subtitle = "Counts based on the hatecrime motive",
caption = "Source: NY State Division of Criminal Justice Services") +
theme_minimal()Add Annotations & Remove X-Axis Values
bias_count |>
head(10) |>
ggplot(aes(x=reorder(biasmotivedescription, n), y = n)) +
geom_col(fill = "salmon") +
coord_flip()+
labs(x = "",
y = "Counts of hatecrime types based on motive",
title = "Bar Graph of Hate Crimes from 2019-2026",
subtitle = "Counts based on the hatecrime motive",
caption = "Source: NY State Division of Criminal Justice Services") +
theme_minimal()+
geom_text(aes(label = n), hjust = -.05, size = 3) +
theme(axis.text.x = element_blank())Look Deeper into Specific Crimes
hate_year <- hatecrimes |>
filter(biasmotivedescription %in% c("ANTI-JEWISH", "ANTI-MALE HOMOSEXUAL (GAY)", "ANTI-ASIAN", "ANTI-BLACK"))|>
group_by(complaintyearnumber) |>
count(biasmotivedescription)|>
arrange(desc(n))
hate_year# A tibble: 28 × 3
# Groups: complaintyearnumber [7]
complaintyearnumber biasmotivedescription n
<dbl> <chr> <int>
1 2024 ANTI-JEWISH 371
2 2023 ANTI-JEWISH 343
3 2025 ANTI-JEWISH 320
4 2022 ANTI-JEWISH 279
5 2019 ANTI-JEWISH 252
6 2021 ANTI-JEWISH 215
7 2021 ANTI-ASIAN 150
8 2020 ANTI-JEWISH 126
9 2023 ANTI-MALE HOMOSEXUAL (GAY) 116
10 2022 ANTI-ASIAN 91
# ℹ 18 more rows
hate_county <- hatecrimes |>
filter(biasmotivedescription %in% c("ANTI-JEWISH", "ANTI-MALE HOMOSEXUAL (GAY)", "ANTI-ASIAN", "ANTI-BLACK"))|>
group_by(county) |>
count(biasmotivedescription)|>
arrange(desc(n))
hate_county# A tibble: 20 × 3
# Groups: county [5]
county biasmotivedescription n
<chr> <chr> <int>
1 KINGS ANTI-JEWISH 798
2 NEW YORK ANTI-JEWISH 651
3 QUEENS ANTI-JEWISH 289
4 NEW YORK ANTI-MALE HOMOSEXUAL (GAY) 237
5 NEW YORK ANTI-ASIAN 228
6 KINGS ANTI-MALE HOMOSEXUAL (GAY) 120
7 KINGS ANTI-BLACK 99
8 BRONX ANTI-JEWISH 92
9 QUEENS ANTI-MALE HOMOSEXUAL (GAY) 91
10 KINGS ANTI-ASIAN 80
11 NEW YORK ANTI-BLACK 79
12 QUEENS ANTI-ASIAN 78
13 RICHMOND ANTI-JEWISH 76
14 QUEENS ANTI-BLACK 75
15 BRONX ANTI-MALE HOMOSEXUAL (GAY) 35
16 RICHMOND ANTI-BLACK 35
17 BRONX ANTI-BLACK 27
18 BRONX ANTI-ASIAN 10
19 RICHMOND ANTI-MALE HOMOSEXUAL (GAY) 6
20 RICHMOND ANTI-ASIAN 5
Check Info combinding Totals from Counties and Years
hate2 <- hatecrimes |>
filter(biasmotivedescription %in% c("ANTI-JEWISH", "ANTI-MALE HOMOSEXUAL (GAY)", "ANTI-ASIAN", "ANTI-BLACK"))|>
group_by(complaintyearnumber, county) |>
count(biasmotivedescription)|>
arrange(desc(n))
hate2# A tibble: 127 × 4
# Groups: complaintyearnumber, county [35]
complaintyearnumber county biasmotivedescription n
<dbl> <chr> <chr> <int>
1 2024 KINGS ANTI-JEWISH 152
2 2024 NEW YORK ANTI-JEWISH 136
3 2025 KINGS ANTI-JEWISH 136
4 2019 KINGS ANTI-JEWISH 128
5 2023 KINGS ANTI-JEWISH 126
6 2022 KINGS ANTI-JEWISH 125
7 2023 NEW YORK ANTI-JEWISH 124
8 2025 NEW YORK ANTI-JEWISH 110
9 2022 NEW YORK ANTI-JEWISH 104
10 2021 NEW YORK ANTI-ASIAN 84
# ℹ 117 more rows
Plot the 3 Hate Crimes Together
ggplot(data = hate2) +
geom_bar(aes(x=complaintyearnumber, y=n, fill = biasmotivedescription),
position = "dodge", stat = "identity") +
labs(fill = "Hate Crime Type",
y = "Number of Hate Crime Incidents",
title = "Hate Crime Type in NY Counties Between 2010-2016",
caption = "Source: NY State Division of Criminal Justice Services")Counties
ggplot(data = hate2) +
geom_bar(aes(x=county, y=n, fill = biasmotivedescription),
position = "dodge", stat = "identity") +
labs(fill = "Hate Crime Type",
y = "Number of Hate Crime Incidents",
title = "Hate Crime Type in NY Counties Between 2010-2016",
caption = "Source: NY State Division of Criminal Justice Services")Put it all Together with Years and Counties (Facet)
ggplot(data = hate2) +
geom_bar(aes(x=complaintyearnumber, y=n, fill = biasmotivedescription),
position = "dodge", stat = "identity") +
facet_wrap(~county) +
labs(fill = "Hate Crime Type",
y = "Number of Hate Crime Incidents",
title = "Hate Crime Type in NY Counties Between 2010-2016",
caption = "Source: NY State Division of Criminal Justice Services")NYC Census Data
setwd("C:/Users/wesle/Downloads/Data 110")
nypop <- read_csv("nyc_census_pop_2020.csv")Clean to match the other Dataset
nypop$`Area Name` <- gsub(" County", "", nypop$`Area Name`)
nypop2 <- nypop |>
rename(county = `Area Name`)|>
select(county, `2020 Census Population`)
head(nypop2)# A tibble: 6 × 2
county `2020 Census Population`
<chr> <dbl>
1 Albany 314848
2 Allegany 46456
3 Bronx 1472654
4 Broome 198683
5 Cattaraugus 77042
6 Cayuga 76248
Join the Datasets
datajoin <- left_join(hate2, nypop2, by=c("county"))
datajoin# A tibble: 127 × 5
# Groups: complaintyearnumber, county [35]
complaintyearnumber county biasmotivedescription n 2020 Census Populati…¹
<dbl> <chr> <chr> <int> <dbl>
1 2024 KINGS ANTI-JEWISH 152 NA
2 2024 NEW Y… ANTI-JEWISH 136 NA
3 2025 KINGS ANTI-JEWISH 136 NA
4 2019 KINGS ANTI-JEWISH 128 NA
5 2023 KINGS ANTI-JEWISH 126 NA
6 2022 KINGS ANTI-JEWISH 125 NA
7 2023 NEW Y… ANTI-JEWISH 124 NA
8 2025 NEW Y… ANTI-JEWISH 110 NA
9 2022 NEW Y… ANTI-JEWISH 104 NA
10 2021 NEW Y… ANTI-ASIAN 84 NA
# ℹ 117 more rows
# ℹ abbreviated name: ¹`2020 Census Population`
Fix
hate_new <- hate2 |>
mutate(county = as_factor(str_to_lower(as.character(county))))
nypop_new <- nypop2 |>
mutate(county = as_factor(str_to_lower(as.character(county))))Join Again after the Fix
datajoin <- left_join(hate_new, nypop_new, by=c("county"))
datajoin# A tibble: 127 × 5
# Groups: complaintyearnumber, county [35]
complaintyearnumber county biasmotivedescription n 2020 Census Populati…¹
<dbl> <fct> <chr> <int> <dbl>
1 2024 kings ANTI-JEWISH 152 2736074
2 2024 new y… ANTI-JEWISH 136 1694251
3 2025 kings ANTI-JEWISH 136 2736074
4 2019 kings ANTI-JEWISH 128 2736074
5 2023 kings ANTI-JEWISH 126 2736074
6 2022 kings ANTI-JEWISH 125 2736074
7 2023 new y… ANTI-JEWISH 124 1694251
8 2025 new y… ANTI-JEWISH 110 1694251
9 2022 new y… ANTI-JEWISH 104 1694251
10 2021 new y… ANTI-ASIAN 84 1694251
# ℹ 117 more rows
# ℹ abbreviated name: ¹`2020 Census Population`
Calculate the Rate of Incidents per 100,000 & Arrange in Descending Order
datajoinrate <- datajoin |>
mutate(rate = n/`2020 Census Population`* 100000) |>
arrange(desc(rate))
datajoinrate# A tibble: 127 × 6
# Groups: complaintyearnumber, county [35]
complaintyearnumber county biasmotivedescription n 2020 Census Populati…¹
<dbl> <fct> <chr> <int> <dbl>
1 2024 new y… ANTI-JEWISH 136 1694251
2 2023 new y… ANTI-JEWISH 124 1694251
3 2025 new y… ANTI-JEWISH 110 1694251
4 2022 new y… ANTI-JEWISH 104 1694251
5 2024 kings ANTI-JEWISH 152 2736074
6 2025 kings ANTI-JEWISH 136 2736074
7 2021 new y… ANTI-ASIAN 84 1694251
8 2021 new y… ANTI-JEWISH 84 1694251
9 2019 kings ANTI-JEWISH 128 2736074
10 2023 kings ANTI-JEWISH 126 2736074
# ℹ 117 more rows
# ℹ abbreviated name: ¹`2020 Census Population`
# ℹ 1 more variable: rate <dbl>
Essay
A negative aspect regarding the hatecrimes data set is its lack of information on a national scale, while the information provided would be help for those areas it may not be able to be applied at a larger scale across the country. This is more apparent when you check that New York has one of the highest Jewish populations out of the rest of the nation, and with a lot of the hate crimes being anti-Jewish it could possibly be because of that fact. But a positive of this data set is the fact that it looks at some highly populated areas, New York for example, while this data set may not be applicable at national level it can help in a few highly populated areas. Two different paths you could take this data set in could be either one, not looking too much at the group being targeted by the hate crimes but the overall number of hate crimes to gauge what places have the highest counts of hate crimes, another way is to see what level those hate crimes are felony, misdemeanor, etc. and see where the most hate crimes that are catergorized as a felony for example are.