Aaron HW for data 110

library(tidyverse)

#tinytex::install_tinytex()

#library(tinytex)

setwd(“C:/Users/Truly/OneDrive/Documents/Data work”)

hatecrimes <- read_csv(“hateCrimes2010.csv”)

names(hatecrimes) <- tolower(names(hatecrimes))

names(hatecrimes) <- gsub(” “,”“,names(hatecrimes))

head(hatecrimes)

summary(hatecrimes)

hatecrimes2 <- hatecrimes |> select(county, year, ‘anti-black’, ‘anti-white’, ‘anti-jewish’, ‘anti-catholic’,‘anti-age*’,‘anti-islamic(muslim)’, anti-multi-religiousgroups, ‘anti-gaymale’, ‘anti-hispanic’, anti-otherethnicity/nationalorigin) |> group_by(county, year) head(hatecrimes2)

dim(hatecrimes2)

summary(hatecrimes2)

hatelong <- hatecrimes2 |> pivot_longer( cols = 3:12, names_to = “victim_cat”, values_to = “crimecount”)

hatecrimplot <-hatelong |> ggplot(aes(year, crimecount))+ geom_point()+ aes(color = victim_cat)+ facet_wrap(~victim_cat) hatecrimplot

hatenew <- hatelong |> filter( victim_cat %in% c(“anti-black”, “anti-jewish”, “anti-gaymale”))|> group_by(year, county) |> arrange(desc(crimecount)) hatenew

plot2 <- hatenew |> ggplot() + geom_bar(aes(x=year, y=crimecount, fill = victim_cat), position = “dodge”, stat = “identity”) + labs(fill = “Hate Crime Type”, y = “Number of Hate Crime Incidents”, title = “Hate Crime Type in NY Counties Between 2010-2016”, caption = “Source: NY State Division of Criminal Justice Services”) plot2

plot3 <- hatenew |> ggplot() + geom_bar(aes(x=county, y=crimecount, fill = victim_cat), position = “dodge”, stat = “identity”) + labs(fill = “Hate Crime Type”, y = “Number of Hate Crime Incidents”, title = “Hate Crime Type in NY Counties Between 2010-2016”, caption = “Source: NY State Division of Criminal Justice Services”) plot3

counties <- hatenew |> group_by(year, county)|> summarize(sum = sum(crimecount)) |> arrange(desc(sum))

counties2 <- hatenew |> group_by(county)|> summarize(sum = sum(crimecount)) |> slice_max(order_by = sum, n=5) counties2

plot4 <- hatenew |> filter(county %in% c(“Kings”, “New York”, “Suffolk”, “Nassau”, “Queens”)) |> ggplot() + geom_bar(aes(x=county, y=crimecount, fill = victim_cat), position = “dodge”, stat = “identity”) + labs(y = “Number of Hate Crime Incidents”, title = “5 Counties in NY with Highest Incidents of Hate Crimes”, subtitle = “Between 2010-2016”, fill = “Hate Crime Type”, caption = “Source: NY State Division of Criminal Justice Services”) plot4

setwd(“C:/Users/Truly/OneDrive/Documents/Data work”) nypop <- read_csv(“newyorkpopulation.csv”)

nypop$Geography <- gsub(" , New York", "", nypop$Geography) nypop$Geography <- gsub("County", "", nypop$Geography) nypoplong <- nypop |> rename(county = Geography) |> gather(“year”, “population”, 2:8) nypoplong$year <- as.double(nypoplong$year) head(nypoplong)

nypoplong12 <- nypoplong |> filter(year == 2012) |> arrange(desc(population)) |> head(10) nypoplong12$county<-gsub(" , New York","",nypoplong12$county) nypoplong12

counties12 <- counties |> filter(year == 2012) |> arrange(desc(sum)) counties12

datajoin <- counties12 |> full_join(nypoplong12, by=c(“county”, “year”)) datajoin

datajoinrate <- datajoin |> mutate(rate = sum/population*100000) |> arrange(desc(rate)) datajoinrate

dt <- datajoinrate[,c(“county”,“rate”)] dt

aggregategroups <- hatecrimes |> pivot_longer( cols = 4:44, names_to = “victim_cat”, values_to = “crimecount” ) unique(aggregategroups$victim_cat)

aggregategroups <- aggregategroups |> mutate(group = case_when( victim_cat %in% c(“anti-transgender”, “anti-gayfemale”, “anti-gendervictim_catendityexpression”, “anti-gaymale”, “anti-gay(maleandfemale”, “anti-bisexual”) ~ “anti-lgbtq”, victim_cat %in% c(“anti-multi-racialgroups”, “anti-jewish”, “anti-protestant”, “anti-multi-religousgroups”, “anti-religiouspracticegenerally”, “anti-buddhist”, “anti-hindu”, “anti-mormon”, “anti-sikh”, “anti-catholic”, “anti-islamic(muslim)”, “anti-atheism/agnosticism”, “anti-otherreligion”, “anti-easternorthodox(greek,russian,etc.)”, “anti-jehovahswitness”, “anti-otherchristian”) ~ “anti-religion”, victim_cat %in% c(“anti-asian”, “anti-arab”, “anti-non-hispanic”, “anti-white”, “anti-americanindian/alaskannative”, “anti-nativehawaiian/pacificislander”, “anti-otherrace”, “anti-hispanic”, “anti-otherethnicity/nationalorigin”) ~ “anti-ethnicity”, victim_cat %in% c(“anti-physicaldisability”, “anti-mentaldisability”) ~ “anti-disability”, victim_cat %in% c(“anti-female”, “anti-male”) ~ “anti-gender”, TRUE ~ “others”)) aggregategroups

lgbtq <- hatecrimes |> pivot_longer( cols = 4:44, names_to = “victim_cat”, values_to = “crimecount”) |> filter(victim_cat %in% c(“anti-transgender”, “anti-gayfemale”, “anti-gendervictim_catendityexpression”, “anti-gaymale”, “anti-gay(maleandfemale”, “anti-bisexual”)) lgbtq

##Some positive aspects of the Hate Crimes Dataset is that it provides detailed information about hate crimes, including the type of crime, targeted victim categories, and location. The dataset also spans multiple years and covers different counties in New York, providing the opportunity to explore the different patterns of hate crimes. The negatives of the Hate Crimes Dataset is that it might lack certain variables that could provide a better understanding of hate crimes, such as socio-economic factors, education levels, or the outcomes of these crimes. One path I would take to study more about this dataset is to investigate how hate crime rates have changed over time and whether there are any connections with changes in local or national policies related to discrimination and hate crimes. Another path would be exploring the trends of hate crimes over the years, and determine whether there are significant increases or decreases in certain types of hate crimes. Along with identifying potential factors influencing these trends. A follow up I would do is create more visualizations to better the clarity of the findings from the datasets. Another follow up could be using geographical mapping tools to visualize the distribution of hate crimes across New York counties.