Aaron HW for data 110
library(tidyverse)
#tinytex::install_tinytex()
#library(tinytex)
setwd(“C:/Users/Truly/OneDrive/Documents/Data work”)
hatecrimes <- read_csv(“hateCrimes2010.csv”)
names(hatecrimes) <- tolower(names(hatecrimes))
names(hatecrimes) <- gsub(” “,”“,names(hatecrimes))
head(hatecrimes)
summary(hatecrimes)
hatecrimes2 <- hatecrimes |> select(county, year, ‘anti-black’, ‘anti-white’, ‘anti-jewish’, ‘anti-catholic’,‘anti-age*’,‘anti-islamic(muslim)’, anti-multi-religiousgroups
, ‘anti-gaymale’, ‘anti-hispanic’, anti-otherethnicity/nationalorigin
) |> group_by(county, year) head(hatecrimes2)
dim(hatecrimes2)
summary(hatecrimes2)
hatelong <- hatecrimes2 |> pivot_longer( cols = 3:12, names_to = “victim_cat”, values_to = “crimecount”)
hatecrimplot <-hatelong |> ggplot(aes(year, crimecount))+ geom_point()+ aes(color = victim_cat)+ facet_wrap(~victim_cat) hatecrimplot
hatenew <- hatelong |> filter( victim_cat %in% c(“anti-black”, “anti-jewish”, “anti-gaymale”))|> group_by(year, county) |> arrange(desc(crimecount)) hatenew
plot2 <- hatenew |> ggplot() + geom_bar(aes(x=year, y=crimecount, fill = victim_cat), position = “dodge”, stat = “identity”) + labs(fill = “Hate Crime Type”, y = “Number of Hate Crime Incidents”, title = “Hate Crime Type in NY Counties Between 2010-2016”, caption = “Source: NY State Division of Criminal Justice Services”) plot2
plot3 <- hatenew |> ggplot() + geom_bar(aes(x=county, y=crimecount, fill = victim_cat), position = “dodge”, stat = “identity”) + labs(fill = “Hate Crime Type”, y = “Number of Hate Crime Incidents”, title = “Hate Crime Type in NY Counties Between 2010-2016”, caption = “Source: NY State Division of Criminal Justice Services”) plot3
counties <- hatenew |> group_by(year, county)|> summarize(sum = sum(crimecount)) |> arrange(desc(sum))
counties2 <- hatenew |> group_by(county)|> summarize(sum = sum(crimecount)) |> slice_max(order_by = sum, n=5) counties2
plot4 <- hatenew |> filter(county %in% c(“Kings”, “New York”, “Suffolk”, “Nassau”, “Queens”)) |> ggplot() + geom_bar(aes(x=county, y=crimecount, fill = victim_cat), position = “dodge”, stat = “identity”) + labs(y = “Number of Hate Crime Incidents”, title = “5 Counties in NY with Highest Incidents of Hate Crimes”, subtitle = “Between 2010-2016”, fill = “Hate Crime Type”, caption = “Source: NY State Division of Criminal Justice Services”) plot4
setwd(“C:/Users/Truly/OneDrive/Documents/Data work”) nypop <- read_csv(“newyorkpopulation.csv”)
nypop\(Geography <- gsub(" , New York", "", nypop\)Geography) nypop\(Geography <- gsub("County", "", nypop\)Geography) nypoplong <- nypop |> rename(county = Geography) |> gather(“year”, “population”, 2:8) nypoplong\(year <- as.double(nypoplong\)year) head(nypoplong)
nypoplong12 <- nypoplong |> filter(year == 2012) |> arrange(desc(population)) |> head(10) nypoplong12\(county<-gsub(" , New York","",nypoplong12\)county) nypoplong12
counties12 <- counties |> filter(year == 2012) |> arrange(desc(sum)) counties12
datajoin <- counties12 |> full_join(nypoplong12, by=c(“county”, “year”)) datajoin
datajoinrate <- datajoin |> mutate(rate = sum/population*100000) |> arrange(desc(rate)) datajoinrate
dt <- datajoinrate[,c(“county”,“rate”)] dt
aggregategroups <- hatecrimes |> pivot_longer( cols = 4:44, names_to = “victim_cat”, values_to = “crimecount” ) unique(aggregategroups$victim_cat)
aggregategroups <- aggregategroups |> mutate(group = case_when( victim_cat %in% c(“anti-transgender”, “anti-gayfemale”, “anti-gendervictim_catendityexpression”, “anti-gaymale”, “anti-gay(maleandfemale”, “anti-bisexual”) ~ “anti-lgbtq”, victim_cat %in% c(“anti-multi-racialgroups”, “anti-jewish”, “anti-protestant”, “anti-multi-religousgroups”, “anti-religiouspracticegenerally”, “anti-buddhist”, “anti-hindu”, “anti-mormon”, “anti-sikh”, “anti-catholic”, “anti-islamic(muslim)”, “anti-atheism/agnosticism”, “anti-otherreligion”, “anti-easternorthodox(greek,russian,etc.)”, “anti-jehovahswitness”, “anti-otherchristian”) ~ “anti-religion”, victim_cat %in% c(“anti-asian”, “anti-arab”, “anti-non-hispanic”, “anti-white”, “anti-americanindian/alaskannative”, “anti-nativehawaiian/pacificislander”, “anti-otherrace”, “anti-hispanic”, “anti-otherethnicity/nationalorigin”) ~ “anti-ethnicity”, victim_cat %in% c(“anti-physicaldisability”, “anti-mentaldisability”) ~ “anti-disability”, victim_cat %in% c(“anti-female”, “anti-male”) ~ “anti-gender”, TRUE ~ “others”)) aggregategroups
lgbtq <- hatecrimes |> pivot_longer( cols = 4:44, names_to = “victim_cat”, values_to = “crimecount”) |> filter(victim_cat %in% c(“anti-transgender”, “anti-gayfemale”, “anti-gendervictim_catendityexpression”, “anti-gaymale”, “anti-gay(maleandfemale”, “anti-bisexual”)) lgbtq
##Some positive aspects of the Hate Crimes Dataset is that it provides detailed information about hate crimes, including the type of crime, targeted victim categories, and location. The dataset also spans multiple years and covers different counties in New York, providing the opportunity to explore the different patterns of hate crimes. The negatives of the Hate Crimes Dataset is that it might lack certain variables that could provide a better understanding of hate crimes, such as socio-economic factors, education levels, or the outcomes of these crimes. One path I would take to study more about this dataset is to investigate how hate crime rates have changed over time and whether there are any connections with changes in local or national policies related to discrimination and hate crimes. Another path would be exploring the trends of hate crimes over the years, and determine whether there are significant increases or decreases in certain types of hate crimes. Along with identifying potential factors influencing these trends. A follow up I would do is create more visualizations to better the clarity of the findings from the datasets. Another follow up could be using geographical mapping tools to visualize the distribution of hate crimes across New York counties.