library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

setwd

getwd()
## [1] "C:/Users/Don A/Documents/Don's files/MC"

try again

setwd("C:/Users/Don A/Documents/Don's files/MC")
hatecrimes <- read_csv("hateCrimes2010.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   County = col_character(),
##   `Crime Type` = col_character()
## )
## See spec(...) for full column specifications.

make all headers lowercase and remove spaces

names(hatecrimes) <- tolower(names(hatecrimes))
names(hatecrimes) <- gsub(" ","",names(hatecrimes))
str(hatecrimes)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 423 obs. of  44 variables:
##  $ county                                  : chr  "Albany" "Albany" "Allegany" "Bronx" ...
##  $ year                                    : num  2016 2016 2016 2016 2016 ...
##  $ crimetype                               : chr  "Crimes Against Persons" "Property Crimes" "Property Crimes" "Crimes Against Persons" ...
##  $ anti-male                               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-female                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-transgender                        : num  0 0 0 4 0 0 0 0 0 0 ...
##  $ anti-genderidentityexpression           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-age*                               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-white                              : num  0 0 0 1 1 0 0 0 0 0 ...
##  $ anti-black                              : num  1 2 1 0 0 1 0 1 0 2 ...
##  $ anti-americanindian/alaskannative       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-asian                              : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ anti-nativehawaiian/pacificislander     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-multi-racialgroups                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-otherrace                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-jewish                             : num  0 0 0 0 1 0 1 0 0 0 ...
##  $ anti-catholic                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-protestant                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-islamic(muslim)                    : num  1 0 0 6 0 0 0 0 1 0 ...
##  $ anti-multi-religiousgroups              : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ anti-atheism/agnosticism                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-religiouspracticegenerally         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-otherreligion                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-buddhist                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-easternorthodox(greek,russian,etc.): num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-hindu                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-jehovahswitness                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-mormon                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-otherchristian                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-sikh                               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-hispanic                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-arab                               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-otherethnicity/nationalorigin      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-non-hispanic*                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-gaymale                            : num  1 0 0 8 0 1 0 0 0 0 ...
##  $ anti-gayfemale                          : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ anti-gay(maleandfemale)                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-heterosexual                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-bisexual                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-physicaldisability                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anti-mentaldisability                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ totalincidents                          : num  3 3 1 20 2 3 1 1 1 2 ...
##  $ totalvictims                            : num  4 3 1 20 2 3 1 1 1 2 ...
##  $ totaloffenders                          : num  3 3 1 25 2 3 1 1 1 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   County = col_character(),
##   ..   Year = col_double(),
##   ..   `Crime Type` = col_character(),
##   ..   `Anti-Male` = col_double(),
##   ..   `Anti-Female` = col_double(),
##   ..   `Anti-Transgender` = col_double(),
##   ..   `Anti-Gender Identity Expression` = col_double(),
##   ..   `Anti-Age*` = col_double(),
##   ..   `Anti-White` = col_double(),
##   ..   `Anti-Black` = col_double(),
##   ..   `Anti-American Indian/Alaskan Native` = col_double(),
##   ..   `Anti-Asian` = col_double(),
##   ..   `Anti-Native Hawaiian/Pacific Islander` = col_double(),
##   ..   `Anti-Multi-Racial Groups` = col_double(),
##   ..   `Anti-Other Race` = col_double(),
##   ..   `Anti-Jewish` = col_double(),
##   ..   `Anti-Catholic` = col_double(),
##   ..   `Anti-Protestant` = col_double(),
##   ..   `Anti-Islamic (Muslim)` = col_double(),
##   ..   `Anti-Multi-Religious Groups` = col_double(),
##   ..   `Anti-Atheism/Agnosticism` = col_double(),
##   ..   `Anti-Religious Practice Generally` = col_double(),
##   ..   `Anti-Other Religion` = col_double(),
##   ..   `Anti-Buddhist` = col_double(),
##   ..   `Anti-Eastern Orthodox (Greek, Russian, etc.)` = col_double(),
##   ..   `Anti-Hindu` = col_double(),
##   ..   `Anti-Jehovahs Witness` = col_double(),
##   ..   `Anti-Mormon` = col_double(),
##   ..   `Anti-Other Christian` = col_double(),
##   ..   `Anti-Sikh` = col_double(),
##   ..   `Anti-Hispanic` = col_double(),
##   ..   `Anti-Arab` = col_double(),
##   ..   `Anti-Other Ethnicity/National Origin` = col_double(),
##   ..   `Anti-Non-Hispanic*` = col_double(),
##   ..   `Anti-Gay Male` = col_double(),
##   ..   `Anti-Gay Female` = col_double(),
##   ..   `Anti-Gay (Male and Female)` = col_double(),
##   ..   `Anti-Heterosexual` = col_double(),
##   ..   `Anti-Bisexual` = col_double(),
##   ..   `Anti-Physical Disability` = col_double(),
##   ..   `Anti-Mental Disability` = col_double(),
##   ..   `Total Incidents` = col_double(),
##   ..   `Total Victims` = col_double(),
##   ..   `Total Offenders` = col_double()
##   .. )

select only certain hate crimes

hatecrimes2 <- hatecrimes %>% 
  select(county, year, `anti-black`, 'anti-white', `anti-jewish`, 'anti-catholic','anti-age*','anti-islamic(muslim)', 'anti-gaymale', 'anti-hispanic', totalincidents, totalvictims, totaloffenders)
head(hatecrimes2)
## # A tibble: 6 x 13
##   county  year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
##   <chr>  <dbl>        <dbl>        <dbl>         <dbl>           <dbl>
## 1 Albany  2016            1            0             0               0
## 2 Albany  2016            2            0             0               0
## 3 Alleg~  2016            1            0             0               0
## 4 Bronx   2016            0            1             0               0
## 5 Bronx   2016            0            1             1               0
## 6 Broome  2016            1            0             0               0
## # ... with 7 more variables: `anti-age*` <dbl>,
## #   `anti-islamic(muslim)` <dbl>, `anti-gaymale` <dbl>,
## #   `anti-hispanic` <dbl>, totalincidents <dbl>, totalvictims <dbl>,
## #   totaloffenders <dbl>
dim(hatecrimes2)
## [1] 423  13

check summary to ensure no missing values

summary(hatecrimes2)
##     county               year        anti-black       anti-white     
##  Length:423         Min.   :2010   Min.   : 0.000   Min.   : 0.0000  
##  Class :character   1st Qu.:2011   1st Qu.: 0.000   1st Qu.: 0.0000  
##  Mode  :character   Median :2013   Median : 1.000   Median : 0.0000  
##                     Mean   :2013   Mean   : 1.761   Mean   : 0.3357  
##                     3rd Qu.:2015   3rd Qu.: 2.000   3rd Qu.: 0.0000  
##                     Max.   :2016   Max.   :18.000   Max.   :11.0000  
##   anti-jewish     anti-catholic       anti-age*       anti-islamic(muslim)
##  Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000   Min.   : 0.0000     
##  1st Qu.: 0.000   1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.: 0.0000     
##  Median : 0.000   Median : 0.0000   Median :0.00000   Median : 0.0000     
##  Mean   : 3.981   Mean   : 0.2695   Mean   :0.05201   Mean   : 0.4704     
##  3rd Qu.: 3.000   3rd Qu.: 0.0000   3rd Qu.:0.00000   3rd Qu.: 0.0000     
##  Max.   :82.000   Max.   :12.0000   Max.   :9.00000   Max.   :10.0000     
##   anti-gaymale    anti-hispanic     totalincidents    totalvictims   
##  Min.   : 0.000   Min.   : 0.0000   Min.   :  1.00   Min.   :  1.00  
##  1st Qu.: 0.000   1st Qu.: 0.0000   1st Qu.:  1.00   1st Qu.:  1.00  
##  Median : 0.000   Median : 0.0000   Median :  3.00   Median :  3.00  
##  Mean   : 1.499   Mean   : 0.3735   Mean   : 10.09   Mean   : 10.48  
##  3rd Qu.: 1.000   3rd Qu.: 0.0000   3rd Qu.: 10.00   3rd Qu.: 10.00  
##  Max.   :36.000   Max.   :17.0000   Max.   :101.00   Max.   :106.00  
##  totaloffenders  
##  Min.   :  1.00  
##  1st Qu.:  1.00  
##  Median :  3.00  
##  Mean   : 11.77  
##  3rd Qu.: 11.00  
##  Max.   :113.00

order crimes in descending order

ordered <- hatecrimes2 %>% 
  arrange(desc(totalincidents, totaloffenders, totalvictims))
head(ordered)
## # A tibble: 6 x 13
##   county  year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
##   <chr>  <dbl>        <dbl>        <dbl>         <dbl>           <dbl>
## 1 Kings   2012            4            1            82               6
## 2 Suffo~  2012           18            0            48               7
## 3 Kings   2010           10            3            34               0
## 4 New Y~  2016            6            5             9               0
## 5 Kings   2015            6            3            35               0
## 6 Kings   2016            4            6            26               1
## # ... with 7 more variables: `anti-age*` <dbl>,
## #   `anti-islamic(muslim)` <dbl>, `anti-gaymale` <dbl>,
## #   `anti-hispanic` <dbl>, totalincidents <dbl>, totalvictims <dbl>,
## #   totaloffenders <dbl>

use facet wrap - install reshape2

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
hatecrimeslong <- ordered %>% tidyr::gather("id", "crimecount", 3:13) 
hatecrimesplot <-hatecrimeslong %>% 
  ggplot(., aes(year, crimecount))+
  geom_point()+
  aes(color = id)+
  facet_wrap(~id)
hatecrimesplot

Look deeper into crimes against blacks, gay males, and jews

hatenew <- hatecrimeslong %>%
  filter( id== "anti-black" | id == "anti-jewish" | id == "anti-gaymale")

plot those three together

plot2 <- hatenew %>%
  ggplot() +
  geom_bar(aes(x=year, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
  ylab("Number of Hate Crime Incidents") + 
  labs(fill = "Hate Crime Type")
plot2

What about the counties?

plot3 <- hatenew %>%
  ggplot() +
  geom_bar(aes(x=county, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
  ylab("Number of Hate Crime Incidents") + 
  labs(fill = "Hate Crime Type")
plot3

So many counties

counties <- hatenew %>%
  group_by(county)%>%
  summarize(sum = sum(crimecount)) %>%
  arrange(desc(sum)) %>%
  top_n(n=5)
## Selecting by sum
counties
## # A tibble: 5 x 2
##   county     sum
##   <chr>    <dbl>
## 1 Kings      713
## 2 New York   459
## 3 Suffolk    360
## 4 Nassau     298
## 5 Queens     235

create bar plot

plot4 <- hatenew %>%
  filter(county =="Kings" | county =="New York" | county == "Suffolk" | county == "Nassau" | county == "Queens") %>%
  ggplot() +
  geom_bar(aes(x=county, y=crimecount, fill = id),
      position = "dodge", stat = "identity") +
  labs(ylab = "Number of Hate Crime Incidents",
    title = "5 Counties in NY with Highest Incidents of Hate Crimes",
    subtitle = "Between 2010-2016", 
    fill = "Hate Crime Type")
plot4