library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd
getwd()
## [1] "C:/Users/Don A/Documents/Don's files/MC"
try again
setwd("C:/Users/Don A/Documents/Don's files/MC")
hatecrimes <- read_csv("hateCrimes2010.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## County = col_character(),
## `Crime Type` = col_character()
## )
## See spec(...) for full column specifications.
make all headers lowercase and remove spaces
names(hatecrimes) <- tolower(names(hatecrimes))
names(hatecrimes) <- gsub(" ","",names(hatecrimes))
str(hatecrimes)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 423 obs. of 44 variables:
## $ county : chr "Albany" "Albany" "Allegany" "Bronx" ...
## $ year : num 2016 2016 2016 2016 2016 ...
## $ crimetype : chr "Crimes Against Persons" "Property Crimes" "Property Crimes" "Crimes Against Persons" ...
## $ anti-male : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-female : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-transgender : num 0 0 0 4 0 0 0 0 0 0 ...
## $ anti-genderidentityexpression : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-age* : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-white : num 0 0 0 1 1 0 0 0 0 0 ...
## $ anti-black : num 1 2 1 0 0 1 0 1 0 2 ...
## $ anti-americanindian/alaskannative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-asian : num 0 0 0 0 0 1 0 0 0 0 ...
## $ anti-nativehawaiian/pacificislander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-multi-racialgroups : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-otherrace : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-jewish : num 0 0 0 0 1 0 1 0 0 0 ...
## $ anti-catholic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-protestant : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-islamic(muslim) : num 1 0 0 6 0 0 0 0 1 0 ...
## $ anti-multi-religiousgroups : num 0 1 0 0 0 0 0 0 0 0 ...
## $ anti-atheism/agnosticism : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-religiouspracticegenerally : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-otherreligion : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-buddhist : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-easternorthodox(greek,russian,etc.): num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-hindu : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-jehovahswitness : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-mormon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-otherchristian : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-sikh : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-hispanic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-arab : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-otherethnicity/nationalorigin : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-non-hispanic* : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-gaymale : num 1 0 0 8 0 1 0 0 0 0 ...
## $ anti-gayfemale : num 0 0 0 1 0 0 0 0 0 0 ...
## $ anti-gay(maleandfemale) : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-heterosexual : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-bisexual : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-physicaldisability : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-mentaldisability : num 0 0 0 0 0 0 0 0 0 0 ...
## $ totalincidents : num 3 3 1 20 2 3 1 1 1 2 ...
## $ totalvictims : num 4 3 1 20 2 3 1 1 1 2 ...
## $ totaloffenders : num 3 3 1 25 2 3 1 1 1 2 ...
## - attr(*, "spec")=
## .. cols(
## .. County = col_character(),
## .. Year = col_double(),
## .. `Crime Type` = col_character(),
## .. `Anti-Male` = col_double(),
## .. `Anti-Female` = col_double(),
## .. `Anti-Transgender` = col_double(),
## .. `Anti-Gender Identity Expression` = col_double(),
## .. `Anti-Age*` = col_double(),
## .. `Anti-White` = col_double(),
## .. `Anti-Black` = col_double(),
## .. `Anti-American Indian/Alaskan Native` = col_double(),
## .. `Anti-Asian` = col_double(),
## .. `Anti-Native Hawaiian/Pacific Islander` = col_double(),
## .. `Anti-Multi-Racial Groups` = col_double(),
## .. `Anti-Other Race` = col_double(),
## .. `Anti-Jewish` = col_double(),
## .. `Anti-Catholic` = col_double(),
## .. `Anti-Protestant` = col_double(),
## .. `Anti-Islamic (Muslim)` = col_double(),
## .. `Anti-Multi-Religious Groups` = col_double(),
## .. `Anti-Atheism/Agnosticism` = col_double(),
## .. `Anti-Religious Practice Generally` = col_double(),
## .. `Anti-Other Religion` = col_double(),
## .. `Anti-Buddhist` = col_double(),
## .. `Anti-Eastern Orthodox (Greek, Russian, etc.)` = col_double(),
## .. `Anti-Hindu` = col_double(),
## .. `Anti-Jehovahs Witness` = col_double(),
## .. `Anti-Mormon` = col_double(),
## .. `Anti-Other Christian` = col_double(),
## .. `Anti-Sikh` = col_double(),
## .. `Anti-Hispanic` = col_double(),
## .. `Anti-Arab` = col_double(),
## .. `Anti-Other Ethnicity/National Origin` = col_double(),
## .. `Anti-Non-Hispanic*` = col_double(),
## .. `Anti-Gay Male` = col_double(),
## .. `Anti-Gay Female` = col_double(),
## .. `Anti-Gay (Male and Female)` = col_double(),
## .. `Anti-Heterosexual` = col_double(),
## .. `Anti-Bisexual` = col_double(),
## .. `Anti-Physical Disability` = col_double(),
## .. `Anti-Mental Disability` = col_double(),
## .. `Total Incidents` = col_double(),
## .. `Total Victims` = col_double(),
## .. `Total Offenders` = col_double()
## .. )
select only certain hate crimes
hatecrimes2 <- hatecrimes %>%
select(county, year, `anti-black`, 'anti-white', `anti-jewish`, 'anti-catholic','anti-age*','anti-islamic(muslim)', 'anti-gaymale', 'anti-hispanic', totalincidents, totalvictims, totaloffenders)
head(hatecrimes2)
## # A tibble: 6 x 13
## county year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Albany 2016 1 0 0 0
## 2 Albany 2016 2 0 0 0
## 3 Alleg~ 2016 1 0 0 0
## 4 Bronx 2016 0 1 0 0
## 5 Bronx 2016 0 1 1 0
## 6 Broome 2016 1 0 0 0
## # ... with 7 more variables: `anti-age*` <dbl>,
## # `anti-islamic(muslim)` <dbl>, `anti-gaymale` <dbl>,
## # `anti-hispanic` <dbl>, totalincidents <dbl>, totalvictims <dbl>,
## # totaloffenders <dbl>
dim(hatecrimes2)
## [1] 423 13
check summary to ensure no missing values
summary(hatecrimes2)
## county year anti-black anti-white
## Length:423 Min. :2010 Min. : 0.000 Min. : 0.0000
## Class :character 1st Qu.:2011 1st Qu.: 0.000 1st Qu.: 0.0000
## Mode :character Median :2013 Median : 1.000 Median : 0.0000
## Mean :2013 Mean : 1.761 Mean : 0.3357
## 3rd Qu.:2015 3rd Qu.: 2.000 3rd Qu.: 0.0000
## Max. :2016 Max. :18.000 Max. :11.0000
## anti-jewish anti-catholic anti-age* anti-islamic(muslim)
## Min. : 0.000 Min. : 0.0000 Min. :0.00000 Min. : 0.0000
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:0.00000 1st Qu.: 0.0000
## Median : 0.000 Median : 0.0000 Median :0.00000 Median : 0.0000
## Mean : 3.981 Mean : 0.2695 Mean :0.05201 Mean : 0.4704
## 3rd Qu.: 3.000 3rd Qu.: 0.0000 3rd Qu.:0.00000 3rd Qu.: 0.0000
## Max. :82.000 Max. :12.0000 Max. :9.00000 Max. :10.0000
## anti-gaymale anti-hispanic totalincidents totalvictims
## Min. : 0.000 Min. : 0.0000 Min. : 1.00 Min. : 1.00
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 0.000 Median : 0.0000 Median : 3.00 Median : 3.00
## Mean : 1.499 Mean : 0.3735 Mean : 10.09 Mean : 10.48
## 3rd Qu.: 1.000 3rd Qu.: 0.0000 3rd Qu.: 10.00 3rd Qu.: 10.00
## Max. :36.000 Max. :17.0000 Max. :101.00 Max. :106.00
## totaloffenders
## Min. : 1.00
## 1st Qu.: 1.00
## Median : 3.00
## Mean : 11.77
## 3rd Qu.: 11.00
## Max. :113.00
order crimes in descending order
ordered <- hatecrimes2 %>%
arrange(desc(totalincidents, totaloffenders, totalvictims))
head(ordered)
## # A tibble: 6 x 13
## county year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Kings 2012 4 1 82 6
## 2 Suffo~ 2012 18 0 48 7
## 3 Kings 2010 10 3 34 0
## 4 New Y~ 2016 6 5 9 0
## 5 Kings 2015 6 3 35 0
## 6 Kings 2016 4 6 26 1
## # ... with 7 more variables: `anti-age*` <dbl>,
## # `anti-islamic(muslim)` <dbl>, `anti-gaymale` <dbl>,
## # `anti-hispanic` <dbl>, totalincidents <dbl>, totalvictims <dbl>,
## # totaloffenders <dbl>
use facet wrap - install reshape2
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
hatecrimeslong <- ordered %>% tidyr::gather("id", "crimecount", 3:13)
hatecrimesplot <-hatecrimeslong %>%
ggplot(., aes(year, crimecount))+
geom_point()+
aes(color = id)+
facet_wrap(~id)
hatecrimesplot
Look deeper into crimes against blacks, gay males, and jews
hatenew <- hatecrimeslong %>%
filter( id== "anti-black" | id == "anti-jewish" | id == "anti-gaymale")
plot those three together
plot2 <- hatenew %>%
ggplot() +
geom_bar(aes(x=year, y=crimecount, fill = id),
position = "dodge", stat = "identity") +
ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
ylab("Number of Hate Crime Incidents") +
labs(fill = "Hate Crime Type")
plot2
What about the counties?
plot3 <- hatenew %>%
ggplot() +
geom_bar(aes(x=county, y=crimecount, fill = id),
position = "dodge", stat = "identity") +
ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
ylab("Number of Hate Crime Incidents") +
labs(fill = "Hate Crime Type")
plot3
So many counties
counties <- hatenew %>%
group_by(county)%>%
summarize(sum = sum(crimecount)) %>%
arrange(desc(sum)) %>%
top_n(n=5)
## Selecting by sum
counties
## # A tibble: 5 x 2
## county sum
## <chr> <dbl>
## 1 Kings 713
## 2 New York 459
## 3 Suffolk 360
## 4 Nassau 298
## 5 Queens 235
create bar plot
plot4 <- hatenew %>%
filter(county =="Kings" | county =="New York" | county == "Suffolk" | county == "Nassau" | county == "Queens") %>%
ggplot() +
geom_bar(aes(x=county, y=crimecount, fill = id),
position = "dodge", stat = "identity") +
labs(ylab = "Number of Hate Crime Incidents",
title = "5 Counties in NY with Highest Incidents of Hate Crimes",
subtitle = "Between 2010-2016",
fill = "Hate Crime Type")
plot4