library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
setwd("/Users/tiffanyking/Desktop/Data 110")
hatecrimes <- read_csv("hateCrimes2010.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   County = col_character(),
##   `Crime Type` = col_character()
## )
## See spec(...) for full column specifications.

Clean up the data:

names(hatecrimes) <- tolower(names(hatecrimes))
names(hatecrimes) <- gsub(" ","",names(hatecrimes))
str(hatecrimes)
## tibble [423 × 44] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ county                                  : chr [1:423] "Albany" "Albany" "Allegany" "Bronx" ...
##  $ year                                    : num [1:423] 2016 2016 2016 2016 2016 ...
##  $ crimetype                               : chr [1:423] "Crimes Against Persons" "Property Crimes" "Property Crimes" "Crimes Against Persons" ...
##  $ anti-male                               : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-female                             : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-transgender                        : num [1:423] 0 0 0 4 0 0 0 0 0 0 ...
##  $ anti-genderidentityexpression           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-age*                               : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-white                              : num [1:423] 0 0 0 1 1 0 0 0 0 0 ...
##  $ anti-black                              : num [1:423] 1 2 1 0 0 1 0 1 0 2 ...
##  $ anti-americanindian/alaskannative       : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-asian                              : num [1:423] 0 0 0 0 0 1 0 0 0 0 ...
##  $ anti-nativehawaiian/pacificislander     : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-multi-racialgroups                 : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-otherrace                          : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-jewish                             : num [1:423] 0 0 0 0 1 0 1 0 0 0 ...
##  $ anti-catholic                           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-protestant                         : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-islamic(muslim)                    : num [1:423] 1 0 0 6 0 0 0 0 1 0 ...
##  $ anti-multi-religiousgroups              : num [1:423] 0 1 0 0 0 0 0 0 0 0 ...
##  $ anti-atheism/agnosticism                : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-religiouspracticegenerally         : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-otherreligion                      : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-buddhist                           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-easternorthodox(greek,russian,etc.): num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-hindu                              : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-jehovahswitness                    : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-mormon                             : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-otherchristian                     : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-sikh                               : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-hispanic                           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-arab                               : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-otherethnicity/nationalorigin      : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-non-hispanic*                      : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-gaymale                            : num [1:423] 1 0 0 8 0 1 0 0 0 0 ...
##  $ anti-gayfemale                          : num [1:423] 0 0 0 1 0 0 0 0 0 0 ...
##  $ anti-gay(maleandfemale)                 : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-heterosexual                       : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-bisexual                           : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-physicaldisability                 : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-mentaldisability                   : num [1:423] 0 0 0 0 0 0 0 0 0 0 ...
##  $ totalincidents                          : num [1:423] 3 3 1 20 2 3 1 1 1 2 ...
##  $ totalvictims                            : num [1:423] 4 3 1 20 2 3 1 1 1 2 ...
##  $ totaloffenders                          : num [1:423] 3 3 1 25 2 3 1 1 1 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   County = col_character(),
##   ..   Year = col_double(),
##   ..   `Crime Type` = col_character(),
##   ..   `Anti-Male` = col_double(),
##   ..   `Anti-Female` = col_double(),
##   ..   `Anti-Transgender` = col_double(),
##   ..   `Anti-Gender Identity Expression` = col_double(),
##   ..   `Anti-Age*` = col_double(),
##   ..   `Anti-White` = col_double(),
##   ..   `Anti-Black` = col_double(),
##   ..   `Anti-American Indian/Alaskan Native` = col_double(),
##   ..   `Anti-Asian` = col_double(),
##   ..   `Anti-Native Hawaiian/Pacific Islander` = col_double(),
##   ..   `Anti-Multi-Racial Groups` = col_double(),
##   ..   `Anti-Other Race` = col_double(),
##   ..   `Anti-Jewish` = col_double(),
##   ..   `Anti-Catholic` = col_double(),
##   ..   `Anti-Protestant` = col_double(),
##   ..   `Anti-Islamic (Muslim)` = col_double(),
##   ..   `Anti-Multi-Religious Groups` = col_double(),
##   ..   `Anti-Atheism/Agnosticism` = col_double(),
##   ..   `Anti-Religious Practice Generally` = col_double(),
##   ..   `Anti-Other Religion` = col_double(),
##   ..   `Anti-Buddhist` = col_double(),
##   ..   `Anti-Eastern Orthodox (Greek, Russian, etc.)` = col_double(),
##   ..   `Anti-Hindu` = col_double(),
##   ..   `Anti-Jehovahs Witness` = col_double(),
##   ..   `Anti-Mormon` = col_double(),
##   ..   `Anti-Other Christian` = col_double(),
##   ..   `Anti-Sikh` = col_double(),
##   ..   `Anti-Hispanic` = col_double(),
##   ..   `Anti-Arab` = col_double(),
##   ..   `Anti-Other Ethnicity/National Origin` = col_double(),
##   ..   `Anti-Non-Hispanic*` = col_double(),
##   ..   `Anti-Gay Male` = col_double(),
##   ..   `Anti-Gay Female` = col_double(),
##   ..   `Anti-Gay (Male and Female)` = col_double(),
##   ..   `Anti-Heterosexual` = col_double(),
##   ..   `Anti-Bisexual` = col_double(),
##   ..   `Anti-Physical Disability` = col_double(),
##   ..   `Anti-Mental Disability` = col_double(),
##   ..   `Total Incidents` = col_double(),
##   ..   `Total Victims` = col_double(),
##   ..   `Total Offenders` = col_double()
##   .. )

Select only certain hate-crimes

hatecrimes2 <- hatecrimes %>% 
  select(county, year, `anti-black`, 'anti-white', `anti-jewish`, 'anti-catholic','anti-age*','anti-islamic(muslim)', 'anti-gaymale', 'anti-hispanic') %>%
  group_by(county, year)
head(hatecrimes2)
## # A tibble: 6 x 10
## # Groups:   county, year [4]
##   county  year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
##   <chr>  <dbl>        <dbl>        <dbl>         <dbl>           <dbl>
## 1 Albany  2016            1            0             0               0
## 2 Albany  2016            2            0             0               0
## 3 Alleg…  2016            1            0             0               0
## 4 Bronx   2016            0            1             0               0
## 5 Bronx   2016            0            1             1               0
## 6 Broome  2016            1            0             0               0
## # … with 4 more variables: `anti-age*` <dbl>, `anti-islamic(muslim)` <dbl>,
## #   `anti-gaymale` <dbl>, `anti-hispanic` <dbl>

Check the dimensions and the summary to make sure no missing values

dim(hatecrimes2)
## [1] 423  10

There are currently 13 variables with 423 rows.

summary(hatecrimes2)
##     county               year        anti-black       anti-white     
##  Length:423         Min.   :2010   Min.   : 0.000   Min.   : 0.0000  
##  Class :character   1st Qu.:2011   1st Qu.: 0.000   1st Qu.: 0.0000  
##  Mode  :character   Median :2013   Median : 1.000   Median : 0.0000  
##                     Mean   :2013   Mean   : 1.761   Mean   : 0.3357  
##                     3rd Qu.:2015   3rd Qu.: 2.000   3rd Qu.: 0.0000  
##                     Max.   :2016   Max.   :18.000   Max.   :11.0000  
##   anti-jewish     anti-catholic       anti-age*       anti-islamic(muslim)
##  Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000   Min.   : 0.0000     
##  1st Qu.: 0.000   1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.: 0.0000     
##  Median : 0.000   Median : 0.0000   Median :0.00000   Median : 0.0000     
##  Mean   : 3.981   Mean   : 0.2695   Mean   :0.05201   Mean   : 0.4704     
##  3rd Qu.: 3.000   3rd Qu.: 0.0000   3rd Qu.:0.00000   3rd Qu.: 0.0000     
##  Max.   :82.000   Max.   :12.0000   Max.   :9.00000   Max.   :10.0000     
##   anti-gaymale    anti-hispanic    
##  Min.   : 0.000   Min.   : 0.0000  
##  1st Qu.: 0.000   1st Qu.: 0.0000  
##  Median : 0.000   Median : 0.0000  
##  Mean   : 1.499   Mean   : 0.3735  
##  3rd Qu.: 1.000   3rd Qu.: 0.0000  
##  Max.   :36.000   Max.   :17.0000

install.package(“reshape2”)

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
hatecrimeslong <- hatecrimes2 %>% 
  tidyr::gather("id", "crimecount", 3:10) 

hatecrimesplot <-hatecrimeslong %>% 
  ggplot(., aes(year, crimecount))+
  geom_point()+
  aes(color = id)+
  facet_wrap(~id)
hatecrimesplot

## Look deeper into crimes against blacks, gay males, and jews

hatenew <- hatecrimeslong %>%
  filter( id== "anti-black" | id == "anti-jewish" | id == "anti-gaymale")%>%
  group_by(year, county) %>%
  arrange(desc(crimecount))
hatenew
## # A tibble: 1,269 x 4
## # Groups:   year, county [277]
##    county   year id          crimecount
##    <chr>   <dbl> <chr>            <dbl>
##  1 Kings    2012 anti-jewish         82
##  2 Kings    2016 anti-jewish         51
##  3 Suffolk  2014 anti-jewish         48
##  4 Suffolk  2012 anti-jewish         48
##  5 Kings    2011 anti-jewish         44
##  6 Kings    2013 anti-jewish         41
##  7 Kings    2010 anti-jewish         39
##  8 Nassau   2011 anti-jewish         38
##  9 Suffolk  2013 anti-jewish         37
## 10 Nassau   2016 anti-jewish         36
## # … with 1,259 more rows

Plot these three types of hate crimes together

plot2 <- hatenew %>%
  ggplot() +
  geom_bar(aes(x=year, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
  ylab("Number of Hate Crime Incidents") + 
  labs(fill = "Hate Crime Type")
plot2

## What about the counties?

plot3 <- hatenew %>%
  ggplot() +
  geom_bar(aes(x=county, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
  ylab("Number of Hate Crime Incidents") + 
  labs(fill = "Hate Crime Type")
plot3

counties <- hatenew %>%
  group_by(county, year)%>%
  summarize(sum = sum(crimecount)) %>%
  arrange(desc(sum)) 
## `summarise()` regrouping output by 'county' (override with `.groups` argument)
counties
## # A tibble: 277 x 3
## # Groups:   county [60]
##    county    year   sum
##    <chr>    <dbl> <dbl>
##  1 Kings     2012   136
##  2 Kings     2010   110
##  3 Kings     2016   101
##  4 Kings     2013    96
##  5 Kings     2014    94
##  6 Kings     2015    90
##  7 Kings     2011    86
##  8 New York  2016    86
##  9 Suffolk   2012    83
## 10 New York  2013    75
## # … with 267 more rows
plot4 <- hatenew %>%
  filter(county =="Kings" | county =="New York" | county == "Suffolk" | county == "Nassau" | county == "Queens") %>%
  ggplot() +
  geom_bar(aes(x=county, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  labs(ylab = "Number of Hate Crime Incidents",
    title = "5 Counties in NY with Highest Incidents of Hate Crimes",
    subtitle = "Between 2010-2016", 
    fill = "Hate Crime Type")
plot4

How would calculations be affected by looking at hate crimes in counties per year by population densities?

setwd("/Users/tiffanyking/Desktop/Data 110")
nypop <- read_csv("newyorkpopulation.csv")
## Parsed with column specification:
## cols(
##   Geography = col_character(),
##   `2010` = col_double(),
##   `2011` = col_double(),
##   `2012` = col_double(),
##   `2013` = col_double(),
##   `2014` = col_double(),
##   `2015` = col_double(),
##   `2016` = col_double()
## )
nypop$Geography <- gsub(" , New York", "", nypop$Geography)
nypop$Geography <- gsub("County", "", nypop$Geography)
nypoplong <- nypop %>%
  rename(county = Geography) %>%
  gather("year", "population", 2:8) 
nypoplong$year <- as.double(nypoplong$year)
head(nypoplong)
## # A tibble: 6 x 3
##   county                  year population
##   <chr>                  <dbl>      <dbl>
## 1 Albany , New York       2010     304078
## 2 Allegany , New York     2010      48949
## 3 Bronx , New York        2010    1388240
## 4 Broome , New York       2010     200469
## 5 Cattaraugus , New York  2010      80249
## 6 Cayuga , New York       2010      79844

Focus on 2012

nypoplong12 <- nypoplong %>%
  filter(year == 2012) %>%
  arrange(desc(population)) %>%
  head(10)
nypoplong12$county<-gsub(" , New York","",nypoplong12$county)
nypoplong12
## # A tibble: 10 x 3
##    county       year population
##    <chr>       <dbl>      <dbl>
##  1 Kings        2012    2572282
##  2 Queens       2012    2278024
##  3 New York     2012    1625121
##  4 Suffolk      2012    1499382
##  5 Bronx        2012    1414774
##  6 Nassau       2012    1350748
##  7 Westchester  2012     961073
##  8 Erie         2012     920792
##  9 Monroe       2012     748947
## 10 Richmond     2012     470978

Hate Crime 2012

counties12 <- counties %>%
  filter(year == 2012) %>%
  arrange(desc(sum)) 
counties12
## # A tibble: 41 x 3
## # Groups:   county [41]
##    county       year   sum
##    <chr>       <dbl> <dbl>
##  1 Kings        2012   136
##  2 Suffolk      2012    83
##  3 New York     2012    71
##  4 Nassau       2012    48
##  5 Queens       2012    48
##  6 Erie         2012    28
##  7 Bronx        2012    23
##  8 Richmond     2012    18
##  9 Multiple     2012    14
## 10 Westchester  2012    13
## # … with 31 more rows

Join the Hate Crimes data with NY population data for 2012

datajoin <- counties12 %>%
  full_join(nypoplong12, by=c("county", "year"))
datajoin
## # A tibble: 41 x 4
## # Groups:   county [41]
##    county       year   sum population
##    <chr>       <dbl> <dbl>      <dbl>
##  1 Kings        2012   136    2572282
##  2 Suffolk      2012    83    1499382
##  3 New York     2012    71    1625121
##  4 Nassau       2012    48    1350748
##  5 Queens       2012    48    2278024
##  6 Erie         2012    28     920792
##  7 Bronx        2012    23    1414774
##  8 Richmond     2012    18     470978
##  9 Multiple     2012    14         NA
## 10 Westchester  2012    13     961073
## # … with 31 more rows
datajoinrate <- datajoin %>%
  mutate(rate = sum/population*100000) %>%
  arrange(desc(rate))
datajoinrate
## # A tibble: 41 x 5
## # Groups:   county [41]
##    county       year   sum population  rate
##    <chr>       <dbl> <dbl>      <dbl> <dbl>
##  1 Suffolk      2012    83    1499382 5.54 
##  2 Kings        2012   136    2572282 5.29 
##  3 New York     2012    71    1625121 4.37 
##  4 Richmond     2012    18     470978 3.82 
##  5 Nassau       2012    48    1350748 3.55 
##  6 Erie         2012    28     920792 3.04 
##  7 Queens       2012    48    2278024 2.11 
##  8 Bronx        2012    23    1414774 1.63 
##  9 Westchester  2012    13     961073 1.35 
## 10 Monroe       2012     5     748947 0.668
## # … with 31 more rows
dt <- datajoinrate[,c("county","rate")]
dt
## # A tibble: 41 x 2
## # Groups:   county [41]
##    county       rate
##    <chr>       <dbl>
##  1 Suffolk     5.54 
##  2 Kings       5.29 
##  3 New York    4.37 
##  4 Richmond    3.82 
##  5 Nassau      3.55 
##  6 Erie        3.04 
##  7 Queens      2.11 
##  8 Bronx       1.63 
##  9 Westchester 1.35 
## 10 Monroe      0.668
## # … with 31 more rows

Summary

After reviewing the Hate Crimes Dataset, I think one positive aspect of the data was that various New York counties were represented. One negative aspect of the data set was there were only three main crime types. I think it would be interesting to see which kinds of groups commit hate crimes (White, Hispanic, Women, Men, Heterosexual, etc.). I also think we could expand on the three main crime types and break them down as violent or non-violent. It would be interesting to take one kind of hate crime, for instance ‘crimes against persons: kidnapping, fondling, and sex offenses - nonforcible’ or ‘crimes against property: bribery, counterfeiting/forgery, embezzlement, extortion/blackmail, fraud offenses, larceny-theft offenses, and stolen property offenses’ and see how often this occurs in different counties. After the results, I would follow up to see if there is an incline or decline in hate crimes committed in individual counties and ask more questions.