Hate Crimes Dataset

Author

Shalanda Henderson

#Upload Libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tinytex)

#Set Working Directory and Read-in Hate Crimes Dataset

setwd("/Users/smhenderson/Desktop/DATA110/R/Datasets")
hatecrimes <- read_csv("hateCrimes2010.csv")
Rows: 423 Columns: 44
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): County, Crime Type
dbl (42): Year, Anti-Male, Anti-Female, Anti-Transgender, Anti-Gender Identi...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#Data Cleaning, Structure View, and Summary of Data

names(hatecrimes) <- tolower(names(hatecrimes)) #convert cols to lowercase
names(hatecrimes) <- gsub(" ","",names(hatecrimes)) #removes spaces from the cols
str(hatecrimes) #displays structure of the dataset
spc_tbl_ [423 × 44] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ county                                  : chr [1:423] "Albany" "Albany" "Allegany" "Bronx" ...
 $ year                                    : num [1:423] 2016 2016 2016 2016 2016 ...
 $ crimetype                               : chr [1:423] "Crimes Against Persons" "Property Crimes" "Property Crimes" "Crimes Against Persons" ...
 $ anti-male                               : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-female                             : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-transgender                        : num [1:423] 0 0 0 4 0 0 0 0 0 0 ...
 $ anti-genderidentityexpression           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-age*                               : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-white                              : num [1:423] 0 0 0 1 1 0 0 0 0 0 ...
 $ anti-black                              : num [1:423] 1 2 1 0 0 1 0 1 0 2 ...
 $ anti-americanindian/alaskannative       : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-asian                              : num [1:423] 0 0 0 0 0 1 0 0 0 0 ...
 $ anti-nativehawaiian/pacificislander     : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-multi-racialgroups                 : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-otherrace                          : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-jewish                             : num [1:423] 0 0 0 0 1 0 1 0 0 0 ...
 $ anti-catholic                           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-protestant                         : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-islamic(muslim)                    : num [1:423] 1 0 0 6 0 0 0 0 1 0 ...
 $ anti-multi-religiousgroups              : num [1:423] 0 1 0 0 0 0 0 0 0 0 ...
 $ anti-atheism/agnosticism                : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-religiouspracticegenerally         : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-otherreligion                      : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-buddhist                           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-easternorthodox(greek,russian,etc.): num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-hindu                              : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-jehovahswitness                    : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-mormon                             : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-otherchristian                     : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-sikh                               : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-hispanic                           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-arab                               : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-otherethnicity/nationalorigin      : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-non-hispanic*                      : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-gaymale                            : num [1:423] 1 0 0 8 0 1 0 0 0 0 ...
 $ anti-gayfemale                          : num [1:423] 0 0 0 1 0 0 0 0 0 0 ...
 $ anti-gay(maleandfemale)                 : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-heterosexual                       : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-bisexual                           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-physicaldisability                 : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ anti-mentaldisability                   : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
 $ totalincidents                          : num [1:423] 3 3 1 20 2 3 1 1 1 2 ...
 $ totalvictims                            : num [1:423] 4 3 1 20 2 3 1 1 1 2 ...
 $ totaloffenders                          : num [1:423] 3 3 1 25 2 3 1 1 1 2 ...
 - attr(*, "spec")=
  .. cols(
  ..   County = col_character(),
  ..   Year = col_double(),
  ..   `Crime Type` = col_character(),
  ..   `Anti-Male` = col_double(),
  ..   `Anti-Female` = col_double(),
  ..   `Anti-Transgender` = col_double(),
  ..   `Anti-Gender Identity Expression` = col_double(),
  ..   `Anti-Age*` = col_double(),
  ..   `Anti-White` = col_double(),
  ..   `Anti-Black` = col_double(),
  ..   `Anti-American Indian/Alaskan Native` = col_double(),
  ..   `Anti-Asian` = col_double(),
  ..   `Anti-Native Hawaiian/Pacific Islander` = col_double(),
  ..   `Anti-Multi-Racial Groups` = col_double(),
  ..   `Anti-Other Race` = col_double(),
  ..   `Anti-Jewish` = col_double(),
  ..   `Anti-Catholic` = col_double(),
  ..   `Anti-Protestant` = col_double(),
  ..   `Anti-Islamic (Muslim)` = col_double(),
  ..   `Anti-Multi-Religious Groups` = col_double(),
  ..   `Anti-Atheism/Agnosticism` = col_double(),
  ..   `Anti-Religious Practice Generally` = col_double(),
  ..   `Anti-Other Religion` = col_double(),
  ..   `Anti-Buddhist` = col_double(),
  ..   `Anti-Eastern Orthodox (Greek, Russian, etc.)` = col_double(),
  ..   `Anti-Hindu` = col_double(),
  ..   `Anti-Jehovahs Witness` = col_double(),
  ..   `Anti-Mormon` = col_double(),
  ..   `Anti-Other Christian` = col_double(),
  ..   `Anti-Sikh` = col_double(),
  ..   `Anti-Hispanic` = col_double(),
  ..   `Anti-Arab` = col_double(),
  ..   `Anti-Other Ethnicity/National Origin` = col_double(),
  ..   `Anti-Non-Hispanic*` = col_double(),
  ..   `Anti-Gay Male` = col_double(),
  ..   `Anti-Gay Female` = col_double(),
  ..   `Anti-Gay (Male and Female)` = col_double(),
  ..   `Anti-Heterosexual` = col_double(),
  ..   `Anti-Bisexual` = col_double(),
  ..   `Anti-Physical Disability` = col_double(),
  ..   `Anti-Mental Disability` = col_double(),
  ..   `Total Incidents` = col_double(),
  ..   `Total Victims` = col_double(),
  ..   `Total Offenders` = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
summary(hatecrimes) #provides summary - min, max, median, mean, etc.
    county               year       crimetype           anti-male       
 Length:423         Min.   :2010   Length:423         Min.   :0.000000  
 Class :character   1st Qu.:2011   Class :character   1st Qu.:0.000000  
 Mode  :character   Median :2013   Mode  :character   Median :0.000000  
                    Mean   :2013                      Mean   :0.007092  
                    3rd Qu.:2015                      3rd Qu.:0.000000  
                    Max.   :2016                      Max.   :1.000000  
  anti-female      anti-transgender  anti-genderidentityexpression
 Min.   :0.00000   Min.   :0.00000   Min.   :0.00000              
 1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000              
 Median :0.00000   Median :0.00000   Median :0.00000              
 Mean   :0.01655   Mean   :0.04728   Mean   :0.05674              
 3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000              
 Max.   :1.00000   Max.   :5.00000   Max.   :3.00000              
   anti-age*         anti-white        anti-black    
 Min.   :0.00000   Min.   : 0.0000   Min.   : 0.000  
 1st Qu.:0.00000   1st Qu.: 0.0000   1st Qu.: 0.000  
 Median :0.00000   Median : 0.0000   Median : 1.000  
 Mean   :0.05201   Mean   : 0.3357   Mean   : 1.761  
 3rd Qu.:0.00000   3rd Qu.: 0.0000   3rd Qu.: 2.000  
 Max.   :9.00000   Max.   :11.0000   Max.   :18.000  
 anti-americanindian/alaskannative   anti-asian    
 Min.   :0.000000                  Min.   :0.0000  
 1st Qu.:0.000000                  1st Qu.:0.0000  
 Median :0.000000                  Median :0.0000  
 Mean   :0.007092                  Mean   :0.1773  
 3rd Qu.:0.000000                  3rd Qu.:0.0000  
 Max.   :1.000000                  Max.   :8.0000  
 anti-nativehawaiian/pacificislander anti-multi-racialgroups anti-otherrace
 Min.   :0                           Min.   :0.00000         Min.   :0     
 1st Qu.:0                           1st Qu.:0.00000         1st Qu.:0     
 Median :0                           Median :0.00000         Median :0     
 Mean   :0                           Mean   :0.08511         Mean   :0     
 3rd Qu.:0                           3rd Qu.:0.00000         3rd Qu.:0     
 Max.   :0                           Max.   :3.00000         Max.   :0     
  anti-jewish     anti-catholic     anti-protestant   anti-islamic(muslim)
 Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000   Min.   : 0.0000     
 1st Qu.: 0.000   1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.: 0.0000     
 Median : 0.000   Median : 0.0000   Median :0.00000   Median : 0.0000     
 Mean   : 3.981   Mean   : 0.2695   Mean   :0.02364   Mean   : 0.4704     
 3rd Qu.: 3.000   3rd Qu.: 0.0000   3rd Qu.:0.00000   3rd Qu.: 0.0000     
 Max.   :82.000   Max.   :12.0000   Max.   :1.00000   Max.   :10.0000     
 anti-multi-religiousgroups anti-atheism/agnosticism
 Min.   : 0.00000           Min.   :0               
 1st Qu.: 0.00000           1st Qu.:0               
 Median : 0.00000           Median :0               
 Mean   : 0.07565           Mean   :0               
 3rd Qu.: 0.00000           3rd Qu.:0               
 Max.   :10.00000           Max.   :0               
 anti-religiouspracticegenerally anti-otherreligion anti-buddhist
 Min.   :0.000000                Min.   :0.000      Min.   :0    
 1st Qu.:0.000000                1st Qu.:0.000      1st Qu.:0    
 Median :0.000000                Median :0.000      Median :0    
 Mean   :0.007092                Mean   :0.104      Mean   :0    
 3rd Qu.:0.000000                3rd Qu.:0.000      3rd Qu.:0    
 Max.   :2.000000                Max.   :4.000      Max.   :0    
 anti-easternorthodox(greek,russian,etc.)   anti-hindu      
 Min.   :0.000000                         Min.   :0.000000  
 1st Qu.:0.000000                         1st Qu.:0.000000  
 Median :0.000000                         Median :0.000000  
 Mean   :0.002364                         Mean   :0.002364  
 3rd Qu.:0.000000                         3rd Qu.:0.000000  
 Max.   :1.000000                         Max.   :1.000000  
 anti-jehovahswitness  anti-mormon anti-otherchristian   anti-sikh
 Min.   :0            Min.   :0    Min.   :0.00000     Min.   :0  
 1st Qu.:0            1st Qu.:0    1st Qu.:0.00000     1st Qu.:0  
 Median :0            Median :0    Median :0.00000     Median :0  
 Mean   :0            Mean   :0    Mean   :0.01655     Mean   :0  
 3rd Qu.:0            3rd Qu.:0    3rd Qu.:0.00000     3rd Qu.:0  
 Max.   :0            Max.   :0    Max.   :3.00000     Max.   :0  
 anti-hispanic       anti-arab       anti-otherethnicity/nationalorigin
 Min.   : 0.0000   Min.   :0.00000   Min.   : 0.0000                   
 1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.: 0.0000                   
 Median : 0.0000   Median :0.00000   Median : 0.0000                   
 Mean   : 0.3735   Mean   :0.06619   Mean   : 0.2837                   
 3rd Qu.: 0.0000   3rd Qu.:0.00000   3rd Qu.: 0.0000                   
 Max.   :17.0000   Max.   :2.00000   Max.   :19.0000                   
 anti-non-hispanic*  anti-gaymale    anti-gayfemale   anti-gay(maleandfemale)
 Min.   :0          Min.   : 0.000   Min.   :0.0000   Min.   :0.0000         
 1st Qu.:0          1st Qu.: 0.000   1st Qu.:0.0000   1st Qu.:0.0000         
 Median :0          Median : 0.000   Median :0.0000   Median :0.0000         
 Mean   :0          Mean   : 1.499   Mean   :0.2411   Mean   :0.1017         
 3rd Qu.:0          3rd Qu.: 1.000   3rd Qu.:0.0000   3rd Qu.:0.0000         
 Max.   :0          Max.   :36.000   Max.   :8.0000   Max.   :4.0000         
 anti-heterosexual  anti-bisexual      anti-physicaldisability
 Min.   :0.000000   Min.   :0.000000   Min.   :0.00000        
 1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.00000        
 Median :0.000000   Median :0.000000   Median :0.00000        
 Mean   :0.002364   Mean   :0.004728   Mean   :0.01182        
 3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.00000        
 Max.   :1.000000   Max.   :1.000000   Max.   :1.00000        
 anti-mentaldisability totalincidents    totalvictims    totaloffenders  
 Min.   :0.000000      Min.   :  1.00   Min.   :  1.00   Min.   :  1.00  
 1st Qu.:0.000000      1st Qu.:  1.00   1st Qu.:  1.00   1st Qu.:  1.00  
 Median :0.000000      Median :  3.00   Median :  3.00   Median :  3.00  
 Mean   :0.009456      Mean   : 10.09   Mean   : 10.48   Mean   : 11.77  
 3rd Qu.:0.000000      3rd Qu.: 10.00   3rd Qu.: 10.00   3rd Qu.: 11.00  
 Max.   :1.000000      Max.   :101.00   Max.   :106.00   Max.   :113.00  

#Select Certain Hate Crimes

hatecrimes2 <- hatecrimes %>% 
  select(county, year, 'anti-black', 'anti-white', 'anti-jewish', 'anti-catholic','anti-age*','anti-islamic(muslim)', 'anti-gaymale', 'anti-hispanic') %>%
  group_by(county, year)
head(hatecrimes2)
# A tibble: 6 × 10
# Groups:   county, year [4]
  county    year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
  <chr>    <dbl>        <dbl>        <dbl>         <dbl>           <dbl>
1 Albany    2016            1            0             0               0
2 Albany    2016            2            0             0               0
3 Allegany  2016            1            0             0               0
4 Bronx     2016            0            1             0               0
5 Bronx     2016            0            1             1               0
6 Broome    2016            1            0             0               0
# ℹ 4 more variables: `anti-age*` <dbl>, `anti-islamic(muslim)` <dbl>,
#   `anti-gaymale` <dbl>, `anti-hispanic` <dbl>

#Check Dimensions and Summary of new dataframe “hatecrimes2”

dim(hatecrimes2) #count how many obs and variables remain
[1] 423  10
summary(hatecrimes2)
    county               year        anti-black       anti-white     
 Length:423         Min.   :2010   Min.   : 0.000   Min.   : 0.0000  
 Class :character   1st Qu.:2011   1st Qu.: 0.000   1st Qu.: 0.0000  
 Mode  :character   Median :2013   Median : 1.000   Median : 0.0000  
                    Mean   :2013   Mean   : 1.761   Mean   : 0.3357  
                    3rd Qu.:2015   3rd Qu.: 2.000   3rd Qu.: 0.0000  
                    Max.   :2016   Max.   :18.000   Max.   :11.0000  
  anti-jewish     anti-catholic       anti-age*       anti-islamic(muslim)
 Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000   Min.   : 0.0000     
 1st Qu.: 0.000   1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.: 0.0000     
 Median : 0.000   Median : 0.0000   Median :0.00000   Median : 0.0000     
 Mean   : 3.981   Mean   : 0.2695   Mean   :0.05201   Mean   : 0.4704     
 3rd Qu.: 3.000   3rd Qu.: 0.0000   3rd Qu.:0.00000   3rd Qu.: 0.0000     
 Max.   :82.000   Max.   :12.0000   Max.   :9.00000   Max.   :10.0000     
  anti-gaymale    anti-hispanic    
 Min.   : 0.000   Min.   : 0.0000  
 1st Qu.: 0.000   1st Qu.: 0.0000  
 Median : 0.000   Median : 0.0000  
 Mean   : 1.499   Mean   : 0.3735  
 3rd Qu.: 1.000   3rd Qu.: 0.0000  
 Max.   :36.000   Max.   :17.0000  

#Convert Dataset from Wide to Long

hatecrimeslong <- hatecrimes2 %>% 
  tidyr::gather("id", "crimecount", 3:10)  #this is eliminating the individual hate crimes variables (cols 3-10) and putting them under one variable name, id. It is also creating another variable, crimecount, that is the cell count of the hate-crime types.

#Look at each set of hate-cimes for each type for each year

hatecrimesplot <-hatecrimeslong %>% 
  ggplot(., aes(year, crimecount))+
  geom_point()+
  aes(color = id)+
  facet_wrap(~id) #view by id col and for each year
hatecrimesplot

#Filter by 3 hate crimes

hatenew <- hatecrimeslong %>%
  filter( id== "anti-black" | id == "anti-jewish" | id == "anti-gaymale")%>%
  group_by(year, county) %>%
  arrange(desc(crimecount))
hatenew
# A tibble: 1,269 × 4
# Groups:   year, county [277]
   county   year id          crimecount
   <chr>   <dbl> <chr>            <dbl>
 1 Kings    2012 anti-jewish         82
 2 Kings    2016 anti-jewish         51
 3 Suffolk  2014 anti-jewish         48
 4 Suffolk  2012 anti-jewish         48
 5 Kings    2011 anti-jewish         44
 6 Kings    2013 anti-jewish         41
 7 Kings    2010 anti-jewish         39
 8 Nassau   2011 anti-jewish         38
 9 Suffolk  2013 anti-jewish         37
10 Nassau   2016 anti-jewish         36
# ℹ 1,259 more rows

#Plot 3 Hate Crimes

plot2 <- hatenew %>%
  ggplot() +
  geom_bar(aes(x=year, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
  ylab("Number of Hate Crime Incidents") + 
  labs(fill = "Hate Crime Type")
plot2

#Bar graphs by county

plot3 <- hatenew %>%
  ggplot() +
  geom_bar(aes(x=county, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
  ylab("Number of Hate Crime Incidents") + 
  labs(fill = "Hate Crime Type")
plot3

counties <- hatenew %>%
  group_by(county, year)%>%
  summarize(sum = sum(crimecount)) %>%
  arrange(desc(sum)) 
`summarise()` has grouped output by 'county'. You can override using the
`.groups` argument.

#Create Barplot for 5 counties

plot4 <- hatenew %>%
  filter(county =="Kings" | county =="New York" | county == "Suffolk" | county == "Nassau" | county == "Queens") %>%
  ggplot() +
  geom_bar(aes(x=county, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  labs(ylab = "Number of Hate Crime Incidents",
    title = "5 Counties in NY with Highest Incidents of Hate Crimes",
    subtitle = "Between 2010-2016", 
    fill = "Hate Crime Type")
plot4 + theme(plot.title=element_text(hjust=0.5)) + theme(plot.subtitle=element_text(hjust=0.5))

#Upload Census Dataset

setwd("/Users/smhenderson/Desktop/DATA110/R/Datasets")
nypop <- read_csv("newyorkpopulation.csv")
Rows: 62 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Geography
dbl (7): 2010, 2011, 2012, 2013, 2014, 2015, 2016

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#Data Cleaning

nypop$Geography <- gsub(" , New York", "", nypop$Geography)
nypop$Geography <- gsub("County", "", nypop$Geography)
nypoplong <- nypop %>%
  rename(county = Geography) %>%
  gather("year", "population", 2:8) 
nypoplong$year <- as.double(nypoplong$year)
head(nypoplong)
# A tibble: 6 × 3
  county                  year population
  <chr>                  <dbl>      <dbl>
1 Albany , New York       2010     304078
2 Allegany , New York     2010      48949
3 Bronx , New York        2010    1388240
4 Broome , New York       2010     200469
5 Cattaraugus , New York  2010      80249
6 Cayuga , New York       2010      79844
nypoplong12 <- nypoplong %>%
  filter(year == 2012) %>%
  arrange(desc(population)) %>%
  head(10)
nypoplong12$county<-gsub(" , New York","",nypoplong12$county)
nypoplong12
# A tibble: 10 × 3
   county       year population
   <chr>       <dbl>      <dbl>
 1 Kings        2012    2572282
 2 Queens       2012    2278024
 3 New York     2012    1625121
 4 Suffolk      2012    1499382
 5 Bronx        2012    1414774
 6 Nassau       2012    1350748
 7 Westchester  2012     961073
 8 Erie         2012     920792
 9 Monroe       2012     748947
10 Richmond     2012     470978

#Filter Hate Crimes for 2012

counties12 <- counties %>%
  filter(year == 2012) %>%
  arrange(desc(sum)) 
counties12
# A tibble: 41 × 3
# Groups:   county [41]
   county       year   sum
   <chr>       <dbl> <dbl>
 1 Kings        2012   136
 2 Suffolk      2012    83
 3 New York     2012    71
 4 Nassau       2012    48
 5 Queens       2012    48
 6 Erie         2012    28
 7 Bronx        2012    23
 8 Richmond     2012    18
 9 Multiple     2012    14
10 Westchester  2012    13
# ℹ 31 more rows

#Join hate crime dataset + census dataset

datajoin <- counties12 %>%
  full_join(nypoplong12, by=c("county", "year"))
datajoin
# A tibble: 41 × 4
# Groups:   county [41]
   county       year   sum population
   <chr>       <dbl> <dbl>      <dbl>
 1 Kings        2012   136    2572282
 2 Suffolk      2012    83    1499382
 3 New York     2012    71    1625121
 4 Nassau       2012    48    1350748
 5 Queens       2012    48    2278024
 6 Erie         2012    28     920792
 7 Bronx        2012    23    1414774
 8 Richmond     2012    18     470978
 9 Multiple     2012    14         NA
10 Westchester  2012    13     961073
# ℹ 31 more rows

#Calculate the rate of incidents per 100,000 and arrange in descending order

datajoinrate <- datajoin %>%
  mutate(rate = sum/population*100000) %>%
  arrange(desc(rate))
datajoinrate
# A tibble: 41 × 5
# Groups:   county [41]
   county       year   sum population  rate
   <chr>       <dbl> <dbl>      <dbl> <dbl>
 1 Suffolk      2012    83    1499382 5.54 
 2 Kings        2012   136    2572282 5.29 
 3 New York     2012    71    1625121 4.37 
 4 Richmond     2012    18     470978 3.82 
 5 Nassau       2012    48    1350748 3.55 
 6 Erie         2012    28     920792 3.04 
 7 Queens       2012    48    2278024 2.11 
 8 Bronx        2012    23    1414774 1.63 
 9 Westchester  2012    13     961073 1.35 
10 Monroe       2012     5     748947 0.668
# ℹ 31 more rows
dt <- datajoinrate[,c("county","rate")]
dt
# A tibble: 41 × 2
# Groups:   county [41]
   county       rate
   <chr>       <dbl>
 1 Suffolk     5.54 
 2 Kings       5.29 
 3 New York    4.37 
 4 Richmond    3.82 
 5 Nassau      3.55 
 6 Erie        3.04 
 7 Queens      2.11 
 8 Bronx       1.63 
 9 Westchester 1.35 
10 Monroe      0.668
# ℹ 31 more rows

#Aggregate some categories

aggregategroups <- hatecrimes %>%
  tidyr::gather("id", "crimecount", 4:44) 
unique(aggregategroups$id)
 [1] "anti-male"                               
 [2] "anti-female"                             
 [3] "anti-transgender"                        
 [4] "anti-genderidentityexpression"           
 [5] "anti-age*"                               
 [6] "anti-white"                              
 [7] "anti-black"                              
 [8] "anti-americanindian/alaskannative"       
 [9] "anti-asian"                              
[10] "anti-nativehawaiian/pacificislander"     
[11] "anti-multi-racialgroups"                 
[12] "anti-otherrace"                          
[13] "anti-jewish"                             
[14] "anti-catholic"                           
[15] "anti-protestant"                         
[16] "anti-islamic(muslim)"                    
[17] "anti-multi-religiousgroups"              
[18] "anti-atheism/agnosticism"                
[19] "anti-religiouspracticegenerally"         
[20] "anti-otherreligion"                      
[21] "anti-buddhist"                           
[22] "anti-easternorthodox(greek,russian,etc.)"
[23] "anti-hindu"                              
[24] "anti-jehovahswitness"                    
[25] "anti-mormon"                             
[26] "anti-otherchristian"                     
[27] "anti-sikh"                               
[28] "anti-hispanic"                           
[29] "anti-arab"                               
[30] "anti-otherethnicity/nationalorigin"      
[31] "anti-non-hispanic*"                      
[32] "anti-gaymale"                            
[33] "anti-gayfemale"                          
[34] "anti-gay(maleandfemale)"                 
[35] "anti-heterosexual"                       
[36] "anti-bisexual"                           
[37] "anti-physicaldisability"                 
[38] "anti-mentaldisability"                   
[39] "totalincidents"                          
[40] "totalvictims"                            
[41] "totaloffenders"                          
aggregategroups <- aggregategroups %>%
  mutate(group = case_when(
    id %in% c("anti-transgender", "anti-gayfemale", "anti-genderidendityexpression", "anti-gaymale", "anti-gay(maleandfemale", "anti-bisexual") ~ "anti-lgbtq",
    id %in% c("anti-multi-racialgroups", "anti-jewish", "anti-protestant", "anti-multi-religousgroups", "anti-religiouspracticegenerally", "anti-buddhist", "anti-hindu", "anti-mormon", "anti-sikh", "anti-catholic", "anti-islamic(muslim)", "anti-atheism/agnosticism", "anti-otherreligion", "anti-easternorthodox(greek,russian,etc.)", "anti-jehovahswitness", "anti-otherchristian") ~ "anti-religion", 
    id %in% c("anti-asian", "anti-arab", "anti-non-hispanic", "anti-white", "anti-americanindian/alaskannative", "anti-nativehawaiian/pacificislander", "anti-otherrace", "anti-hispanic", "anti-otherethnicity/nationalorigin") ~ "anti-ethnicity",
    id %in% c("anti-physicaldisability", "anti-mentaldisability") ~ "anti-disability",
    id %in% c("anti-female", "anti-male") ~ "anti-gender",
    TRUE ~ "others"))
aggregategroups
# A tibble: 17,343 × 6
   county    year crimetype              id        crimecount group      
   <chr>    <dbl> <chr>                  <chr>          <dbl> <chr>      
 1 Albany    2016 Crimes Against Persons anti-male          0 anti-gender
 2 Albany    2016 Property Crimes        anti-male          0 anti-gender
 3 Allegany  2016 Property Crimes        anti-male          0 anti-gender
 4 Bronx     2016 Crimes Against Persons anti-male          0 anti-gender
 5 Bronx     2016 Property Crimes        anti-male          0 anti-gender
 6 Broome    2016 Crimes Against Persons anti-male          0 anti-gender
 7 Cayuga    2016 Property Crimes        anti-male          0 anti-gender
 8 Chemung   2016 Crimes Against Persons anti-male          0 anti-gender
 9 Chemung   2016 Property Crimes        anti-male          0 anti-gender
10 Chenango  2016 Crimes Against Persons anti-male          0 anti-gender
# ℹ 17,333 more rows

#Create LGBTQ subset

lgbtq <- hatecrimes %>%
   tidyr::gather("id", "crimecount", 4:44) %>%
  filter(id %in% c("anti-transgender", "anti-gayfemale", "anti-genderidendityexpression", "anti-gaymale", "anti-gay(maleandfemale", "anti-bisexual"))
lgbtq
# A tibble: 1,692 × 5
   county    year crimetype              id               crimecount
   <chr>    <dbl> <chr>                  <chr>                 <dbl>
 1 Albany    2016 Crimes Against Persons anti-transgender          0
 2 Albany    2016 Property Crimes        anti-transgender          0
 3 Allegany  2016 Property Crimes        anti-transgender          0
 4 Bronx     2016 Crimes Against Persons anti-transgender          4
 5 Bronx     2016 Property Crimes        anti-transgender          0
 6 Broome    2016 Crimes Against Persons anti-transgender          0
 7 Cayuga    2016 Property Crimes        anti-transgender          0
 8 Chemung   2016 Crimes Against Persons anti-transgender          0
 9 Chemung   2016 Property Crimes        anti-transgender          0
10 Chenango  2016 Crimes Against Persons anti-transgender          0
# ℹ 1,682 more rows

#Positive and negative aspects of this dataset. ##Positive: specific in the types of hate crimes, which can allow a researcher to take a deeper look into a hate crime of interest. For example, instead of grouping all religions together, the dataset lists the different religions. ##Negative: the different hate crimes types are variable names instead of colapsed under one varible name

#List 2 different paths you would like to (hypothetically) study about this dataset. ##Look at the number of offenders by counties, especially if this is indicating where they reside ##Compare the different crime types and identify any trends, such as a particular group reporting more of a specific crime type vs other groups or if there’s no significant difference ##Look at the victim to offenders ratio for different groups

#Describe 2 things you would do to follow up after seeing these results. ##Look at the number of hate crimes by the aggregated groups ##See if the LGBTQ group was more likely to have crime againgst persons or property