Pull in lubridate and tidyverse packages
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.5.3
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ----------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
Check working directory
getwd()
## [1] "C:/Users/Jennifer/Documents/MC Data Science/Data Science 110 Writing and Comm/R Data Files and Markdown files"
Pull in the Dataset for Hate Crimes
hatecrimes <- read_csv("hateCrimes2010.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## County = col_character(),
## `Crime Type` = col_character()
## )
## See spec(...) for full column specifications.
View the first few rows of data using head command
head(hatecrimes)
## # A tibble: 6 x 44
## County Year `Crime Type` `Anti-Male` `Anti-Female` `Anti-Transgend~
## <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 Albany 2016 Crimes Agai~ 0 0 0
## 2 Albany 2016 Property Cr~ 0 0 0
## 3 Alleg~ 2016 Property Cr~ 0 0 0
## 4 Bronx 2016 Crimes Agai~ 0 0 4
## 5 Bronx 2016 Property Cr~ 0 0 0
## 6 Broome 2016 Crimes Agai~ 0 0 0
## # ... with 38 more variables: `Anti-Gender Identity Expression` <dbl>,
## # `Anti-Age*` <dbl>, `Anti-White` <dbl>, `Anti-Black` <dbl>,
## # `Anti-American Indian/Alaskan Native` <dbl>, `Anti-Asian` <dbl>,
## # `Anti-Native Hawaiian/Pacific Islander` <dbl>, `Anti-Multi-Racial
## # Groups` <dbl>, `Anti-Other Race` <dbl>, `Anti-Jewish` <dbl>,
## # `Anti-Catholic` <dbl>, `Anti-Protestant` <dbl>, `Anti-Islamic
## # (Muslim)` <dbl>, `Anti-Multi-Religious Groups` <dbl>,
## # `Anti-Atheism/Agnosticism` <dbl>, `Anti-Religious Practice
## # Generally` <dbl>, `Anti-Other Religion` <dbl>, `Anti-Buddhist` <dbl>,
## # `Anti-Eastern Orthodox (Greek, Russian, etc.)` <dbl>,
## # `Anti-Hindu` <dbl>, `Anti-Jehovahs Witness` <dbl>,
## # `Anti-Mormon` <dbl>, `Anti-Other Christian` <dbl>, `Anti-Sikh` <dbl>,
## # `Anti-Hispanic` <dbl>, `Anti-Arab` <dbl>, `Anti-Other
## # Ethnicity/National Origin` <dbl>, `Anti-Non-Hispanic*` <dbl>,
## # `Anti-Gay Male` <dbl>, `Anti-Gay Female` <dbl>, `Anti-Gay (Male and
## # Female)` <dbl>, `Anti-Heterosexual` <dbl>, `Anti-Bisexual` <dbl>,
## # `Anti-Physical Disability` <dbl>, `Anti-Mental Disability` <dbl>,
## # `Total Incidents` <dbl>, `Total Victims` <dbl>, `Total
## # Offenders` <dbl>
View the structure of the dataset
str(hatecrimes)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 423 obs. of 44 variables:
## $ County : chr "Albany" "Albany" "Allegany" "Bronx" ...
## $ Year : num 2016 2016 2016 2016 2016 ...
## $ Crime Type : chr "Crimes Against Persons" "Property Crimes" "Property Crimes" "Crimes Against Persons" ...
## $ Anti-Male : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Female : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Transgender : num 0 0 0 4 0 0 0 0 0 0 ...
## $ Anti-Gender Identity Expression : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Age* : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-White : num 0 0 0 1 1 0 0 0 0 0 ...
## $ Anti-Black : num 1 2 1 0 0 1 0 1 0 2 ...
## $ Anti-American Indian/Alaskan Native : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Asian : num 0 0 0 0 0 1 0 0 0 0 ...
## $ Anti-Native Hawaiian/Pacific Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Multi-Racial Groups : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Other Race : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Jewish : num 0 0 0 0 1 0 1 0 0 0 ...
## $ Anti-Catholic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Protestant : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Islamic (Muslim) : num 1 0 0 6 0 0 0 0 1 0 ...
## $ Anti-Multi-Religious Groups : num 0 1 0 0 0 0 0 0 0 0 ...
## $ Anti-Atheism/Agnosticism : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Religious Practice Generally : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Other Religion : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Buddhist : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Eastern Orthodox (Greek, Russian, etc.): num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Hindu : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Jehovahs Witness : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Mormon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Other Christian : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Sikh : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Hispanic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Arab : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Other Ethnicity/National Origin : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Non-Hispanic* : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Gay Male : num 1 0 0 8 0 1 0 0 0 0 ...
## $ Anti-Gay Female : num 0 0 0 1 0 0 0 0 0 0 ...
## $ Anti-Gay (Male and Female) : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Heterosexual : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Bisexual : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Physical Disability : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Anti-Mental Disability : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Total Incidents : num 3 3 1 20 2 3 1 1 1 2 ...
## $ Total Victims : num 4 3 1 20 2 3 1 1 1 2 ...
## $ Total Offenders : num 3 3 1 25 2 3 1 1 1 2 ...
## - attr(*, "spec")=
## .. cols(
## .. County = col_character(),
## .. Year = col_double(),
## .. `Crime Type` = col_character(),
## .. `Anti-Male` = col_double(),
## .. `Anti-Female` = col_double(),
## .. `Anti-Transgender` = col_double(),
## .. `Anti-Gender Identity Expression` = col_double(),
## .. `Anti-Age*` = col_double(),
## .. `Anti-White` = col_double(),
## .. `Anti-Black` = col_double(),
## .. `Anti-American Indian/Alaskan Native` = col_double(),
## .. `Anti-Asian` = col_double(),
## .. `Anti-Native Hawaiian/Pacific Islander` = col_double(),
## .. `Anti-Multi-Racial Groups` = col_double(),
## .. `Anti-Other Race` = col_double(),
## .. `Anti-Jewish` = col_double(),
## .. `Anti-Catholic` = col_double(),
## .. `Anti-Protestant` = col_double(),
## .. `Anti-Islamic (Muslim)` = col_double(),
## .. `Anti-Multi-Religious Groups` = col_double(),
## .. `Anti-Atheism/Agnosticism` = col_double(),
## .. `Anti-Religious Practice Generally` = col_double(),
## .. `Anti-Other Religion` = col_double(),
## .. `Anti-Buddhist` = col_double(),
## .. `Anti-Eastern Orthodox (Greek, Russian, etc.)` = col_double(),
## .. `Anti-Hindu` = col_double(),
## .. `Anti-Jehovahs Witness` = col_double(),
## .. `Anti-Mormon` = col_double(),
## .. `Anti-Other Christian` = col_double(),
## .. `Anti-Sikh` = col_double(),
## .. `Anti-Hispanic` = col_double(),
## .. `Anti-Arab` = col_double(),
## .. `Anti-Other Ethnicity/National Origin` = col_double(),
## .. `Anti-Non-Hispanic*` = col_double(),
## .. `Anti-Gay Male` = col_double(),
## .. `Anti-Gay Female` = col_double(),
## .. `Anti-Gay (Male and Female)` = col_double(),
## .. `Anti-Heterosexual` = col_double(),
## .. `Anti-Bisexual` = col_double(),
## .. `Anti-Physical Disability` = col_double(),
## .. `Anti-Mental Disability` = col_double(),
## .. `Total Incidents` = col_double(),
## .. `Total Victims` = col_double(),
## .. `Total Offenders` = col_double()
## .. )
Change the variable names to all lowercase
names(hatecrimes) <- tolower(names(hatecrimes))
Change the variable names to remove spaces using gsub command
names(hatecrimes) <- gsub(" ","",names(hatecrimes))
View the structure again
str(hatecrimes)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 423 obs. of 44 variables:
## $ county : chr "Albany" "Albany" "Allegany" "Bronx" ...
## $ year : num 2016 2016 2016 2016 2016 ...
## $ crimetype : chr "Crimes Against Persons" "Property Crimes" "Property Crimes" "Crimes Against Persons" ...
## $ anti-male : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-female : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-transgender : num 0 0 0 4 0 0 0 0 0 0 ...
## $ anti-genderidentityexpression : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-age* : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-white : num 0 0 0 1 1 0 0 0 0 0 ...
## $ anti-black : num 1 2 1 0 0 1 0 1 0 2 ...
## $ anti-americanindian/alaskannative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-asian : num 0 0 0 0 0 1 0 0 0 0 ...
## $ anti-nativehawaiian/pacificislander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-multi-racialgroups : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-otherrace : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-jewish : num 0 0 0 0 1 0 1 0 0 0 ...
## $ anti-catholic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-protestant : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-islamic(muslim) : num 1 0 0 6 0 0 0 0 1 0 ...
## $ anti-multi-religiousgroups : num 0 1 0 0 0 0 0 0 0 0 ...
## $ anti-atheism/agnosticism : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-religiouspracticegenerally : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-otherreligion : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-buddhist : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-easternorthodox(greek,russian,etc.): num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-hindu : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-jehovahswitness : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-mormon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-otherchristian : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-sikh : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-hispanic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-arab : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-otherethnicity/nationalorigin : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-non-hispanic* : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-gaymale : num 1 0 0 8 0 1 0 0 0 0 ...
## $ anti-gayfemale : num 0 0 0 1 0 0 0 0 0 0 ...
## $ anti-gay(maleandfemale) : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-heterosexual : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-bisexual : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-physicaldisability : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anti-mentaldisability : num 0 0 0 0 0 0 0 0 0 0 ...
## $ totalincidents : num 3 3 1 20 2 3 1 1 1 2 ...
## $ totalvictims : num 4 3 1 20 2 3 1 1 1 2 ...
## $ totaloffenders : num 3 3 1 25 2 3 1 1 1 2 ...
## - attr(*, "spec")=
## .. cols(
## .. County = col_character(),
## .. Year = col_double(),
## .. `Crime Type` = col_character(),
## .. `Anti-Male` = col_double(),
## .. `Anti-Female` = col_double(),
## .. `Anti-Transgender` = col_double(),
## .. `Anti-Gender Identity Expression` = col_double(),
## .. `Anti-Age*` = col_double(),
## .. `Anti-White` = col_double(),
## .. `Anti-Black` = col_double(),
## .. `Anti-American Indian/Alaskan Native` = col_double(),
## .. `Anti-Asian` = col_double(),
## .. `Anti-Native Hawaiian/Pacific Islander` = col_double(),
## .. `Anti-Multi-Racial Groups` = col_double(),
## .. `Anti-Other Race` = col_double(),
## .. `Anti-Jewish` = col_double(),
## .. `Anti-Catholic` = col_double(),
## .. `Anti-Protestant` = col_double(),
## .. `Anti-Islamic (Muslim)` = col_double(),
## .. `Anti-Multi-Religious Groups` = col_double(),
## .. `Anti-Atheism/Agnosticism` = col_double(),
## .. `Anti-Religious Practice Generally` = col_double(),
## .. `Anti-Other Religion` = col_double(),
## .. `Anti-Buddhist` = col_double(),
## .. `Anti-Eastern Orthodox (Greek, Russian, etc.)` = col_double(),
## .. `Anti-Hindu` = col_double(),
## .. `Anti-Jehovahs Witness` = col_double(),
## .. `Anti-Mormon` = col_double(),
## .. `Anti-Other Christian` = col_double(),
## .. `Anti-Sikh` = col_double(),
## .. `Anti-Hispanic` = col_double(),
## .. `Anti-Arab` = col_double(),
## .. `Anti-Other Ethnicity/National Origin` = col_double(),
## .. `Anti-Non-Hispanic*` = col_double(),
## .. `Anti-Gay Male` = col_double(),
## .. `Anti-Gay Female` = col_double(),
## .. `Anti-Gay (Male and Female)` = col_double(),
## .. `Anti-Heterosexual` = col_double(),
## .. `Anti-Bisexual` = col_double(),
## .. `Anti-Physical Disability` = col_double(),
## .. `Anti-Mental Disability` = col_double(),
## .. `Total Incidents` = col_double(),
## .. `Total Victims` = col_double(),
## .. `Total Offenders` = col_double()
## .. )
View the summary statistics
summary(hatecrimes)
## county year crimetype anti-male
## Length:423 Min. :2010 Length:423 Min. :0.000000
## Class :character 1st Qu.:2011 Class :character 1st Qu.:0.000000
## Mode :character Median :2013 Mode :character Median :0.000000
## Mean :2013 Mean :0.007092
## 3rd Qu.:2015 3rd Qu.:0.000000
## Max. :2016 Max. :1.000000
## anti-female anti-transgender anti-genderidentityexpression
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01655 Mean :0.04728 Mean :0.05674
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :5.00000 Max. :3.00000
## anti-age* anti-white anti-black
## Min. :0.00000 Min. : 0.0000 Min. : 0.000
## 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.: 0.000
## Median :0.00000 Median : 0.0000 Median : 1.000
## Mean :0.05201 Mean : 0.3357 Mean : 1.761
## 3rd Qu.:0.00000 3rd Qu.: 0.0000 3rd Qu.: 2.000
## Max. :9.00000 Max. :11.0000 Max. :18.000
## anti-americanindian/alaskannative anti-asian
## Min. :0.000000 Min. :0.0000
## 1st Qu.:0.000000 1st Qu.:0.0000
## Median :0.000000 Median :0.0000
## Mean :0.007092 Mean :0.1773
## 3rd Qu.:0.000000 3rd Qu.:0.0000
## Max. :1.000000 Max. :8.0000
## anti-nativehawaiian/pacificislander anti-multi-racialgroups
## Min. :0 Min. :0.00000
## 1st Qu.:0 1st Qu.:0.00000
## Median :0 Median :0.00000
## Mean :0 Mean :0.08511
## 3rd Qu.:0 3rd Qu.:0.00000
## Max. :0 Max. :3.00000
## anti-otherrace anti-jewish anti-catholic anti-protestant
## Min. :0 Min. : 0.000 Min. : 0.0000 Min. :0.00000
## 1st Qu.:0 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:0.00000
## Median :0 Median : 0.000 Median : 0.0000 Median :0.00000
## Mean :0 Mean : 3.981 Mean : 0.2695 Mean :0.02364
## 3rd Qu.:0 3rd Qu.: 3.000 3rd Qu.: 0.0000 3rd Qu.:0.00000
## Max. :0 Max. :82.000 Max. :12.0000 Max. :1.00000
## anti-islamic(muslim) anti-multi-religiousgroups anti-atheism/agnosticism
## Min. : 0.0000 Min. : 0.00000 Min. :0
## 1st Qu.: 0.0000 1st Qu.: 0.00000 1st Qu.:0
## Median : 0.0000 Median : 0.00000 Median :0
## Mean : 0.4704 Mean : 0.07565 Mean :0
## 3rd Qu.: 0.0000 3rd Qu.: 0.00000 3rd Qu.:0
## Max. :10.0000 Max. :10.00000 Max. :0
## anti-religiouspracticegenerally anti-otherreligion anti-buddhist
## Min. :0.000000 Min. :0.000 Min. :0
## 1st Qu.:0.000000 1st Qu.:0.000 1st Qu.:0
## Median :0.000000 Median :0.000 Median :0
## Mean :0.007092 Mean :0.104 Mean :0
## 3rd Qu.:0.000000 3rd Qu.:0.000 3rd Qu.:0
## Max. :2.000000 Max. :4.000 Max. :0
## anti-easternorthodox(greek,russian,etc.) anti-hindu
## Min. :0.000000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.000000 Median :0.000000
## Mean :0.002364 Mean :0.002364
## 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.000000 Max. :1.000000
## anti-jehovahswitness anti-mormon anti-otherchristian anti-sikh
## Min. :0 Min. :0 Min. :0.00000 Min. :0
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.00000 1st Qu.:0
## Median :0 Median :0 Median :0.00000 Median :0
## Mean :0 Mean :0 Mean :0.01655 Mean :0
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.00000 3rd Qu.:0
## Max. :0 Max. :0 Max. :3.00000 Max. :0
## anti-hispanic anti-arab anti-otherethnicity/nationalorigin
## Min. : 0.0000 Min. :0.00000 Min. : 0.0000
## 1st Qu.: 0.0000 1st Qu.:0.00000 1st Qu.: 0.0000
## Median : 0.0000 Median :0.00000 Median : 0.0000
## Mean : 0.3735 Mean :0.06619 Mean : 0.2837
## 3rd Qu.: 0.0000 3rd Qu.:0.00000 3rd Qu.: 0.0000
## Max. :17.0000 Max. :2.00000 Max. :19.0000
## anti-non-hispanic* anti-gaymale anti-gayfemale
## Min. :0 Min. : 0.000 Min. :0.0000
## 1st Qu.:0 1st Qu.: 0.000 1st Qu.:0.0000
## Median :0 Median : 0.000 Median :0.0000
## Mean :0 Mean : 1.499 Mean :0.2411
## 3rd Qu.:0 3rd Qu.: 1.000 3rd Qu.:0.0000
## Max. :0 Max. :36.000 Max. :8.0000
## anti-gay(maleandfemale) anti-heterosexual anti-bisexual
## Min. :0.0000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.0000 Median :0.000000 Median :0.000000
## Mean :0.1017 Mean :0.002364 Mean :0.004728
## 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :4.0000 Max. :1.000000 Max. :1.000000
## anti-physicaldisability anti-mentaldisability totalincidents
## Min. :0.00000 Min. :0.000000 Min. : 1.00
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.: 1.00
## Median :0.00000 Median :0.000000 Median : 3.00
## Mean :0.01182 Mean :0.009456 Mean : 10.09
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.: 10.00
## Max. :1.00000 Max. :1.000000 Max. :101.00
## totalvictims totaloffenders
## Min. : 1.00 Min. : 1.00
## 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 3.00 Median : 3.00
## Mean : 10.48 Mean : 11.77
## 3rd Qu.: 10.00 3rd Qu.: 11.00
## Max. :106.00 Max. :113.00
Note that there are no “NA” values found
Select only the hate crimes with higher incidence
hatecrimes2 <- hatecrimes %>%
select(county, year, 'anti-black', 'anti-white', 'anti-jewish', 'anti-catholic', 'anti-age*', 'anti-islamic(muslim)', 'anti-gaymale', 'anti-hispanic', totalincidents, totalvictims, totaloffenders)
head(hatecrimes2)
## # A tibble: 6 x 13
## county year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Albany 2016 1 0 0 0
## 2 Albany 2016 2 0 0 0
## 3 Alleg~ 2016 1 0 0 0
## 4 Bronx 2016 0 1 0 0
## 5 Bronx 2016 0 1 1 0
## 6 Broome 2016 1 0 0 0
## # ... with 7 more variables: `anti-age*` <dbl>,
## # `anti-islamic(muslim)` <dbl>, `anti-gaymale` <dbl>,
## # `anti-hispanic` <dbl>, totalincidents <dbl>, totalvictims <dbl>,
## # totaloffenders <dbl>
Check summary to look for missing values
summary(hatecrimes2)
## county year anti-black anti-white
## Length:423 Min. :2010 Min. : 0.000 Min. : 0.0000
## Class :character 1st Qu.:2011 1st Qu.: 0.000 1st Qu.: 0.0000
## Mode :character Median :2013 Median : 1.000 Median : 0.0000
## Mean :2013 Mean : 1.761 Mean : 0.3357
## 3rd Qu.:2015 3rd Qu.: 2.000 3rd Qu.: 0.0000
## Max. :2016 Max. :18.000 Max. :11.0000
## anti-jewish anti-catholic anti-age* anti-islamic(muslim)
## Min. : 0.000 Min. : 0.0000 Min. :0.00000 Min. : 0.0000
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:0.00000 1st Qu.: 0.0000
## Median : 0.000 Median : 0.0000 Median :0.00000 Median : 0.0000
## Mean : 3.981 Mean : 0.2695 Mean :0.05201 Mean : 0.4704
## 3rd Qu.: 3.000 3rd Qu.: 0.0000 3rd Qu.:0.00000 3rd Qu.: 0.0000
## Max. :82.000 Max. :12.0000 Max. :9.00000 Max. :10.0000
## anti-gaymale anti-hispanic totalincidents totalvictims
## Min. : 0.000 Min. : 0.0000 Min. : 1.00 Min. : 1.00
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 0.000 Median : 0.0000 Median : 3.00 Median : 3.00
## Mean : 1.499 Mean : 0.3735 Mean : 10.09 Mean : 10.48
## 3rd Qu.: 1.000 3rd Qu.: 0.0000 3rd Qu.: 10.00 3rd Qu.: 10.00
## Max. :36.000 Max. :17.0000 Max. :101.00 Max. :106.00
## totaloffenders
## Min. : 1.00
## 1st Qu.: 1.00
## Median : 3.00
## Mean : 11.77
## 3rd Qu.: 11.00
## Max. :113.00
Check the dimensions to count how many variables remain
dim(hatecrimes2)
## [1] 423 13
Reorder the crimes in descending order by number of incidents
ordered <- hatecrimes2 %>%
arrange(desc(totalincidents, totaloffenders, totalvictims))
head(ordered)
## # A tibble: 6 x 13
## county year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Kings 2012 4 1 82 6
## 2 Suffo~ 2012 18 0 48 7
## 3 Kings 2010 10 3 34 0
## 4 New Y~ 2016 6 5 9 0
## 5 Kings 2015 6 3 35 0
## 6 Kings 2016 4 6 26 1
## # ... with 7 more variables: `anti-age*` <dbl>,
## # `anti-islamic(muslim)` <dbl>, `anti-gaymale` <dbl>,
## # `anti-hispanic` <dbl>, totalincidents <dbl>, totalvictims <dbl>,
## # totaloffenders <dbl>
Pull in package called reshape2
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.5.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
Convert the data from wide to long
hatecrimeslong <- ordered %>% tidyr::gather("id", "crimecount", 3:13)
hatecrimesplot <- hatecrimeslong %>%
ggplot(., aes(year, crimecount)) +
geom_point() +
aes(color = id) +
facet_wrap(~id)
hatecrimesplot

Filter further into crimes against black, gay males, and jews
hatenew <- hatecrimeslong %>%
filter(id == "anti-black" | id == "anti-jewish" | id == "anti-gaymale")
library(ggplot2)
Plot these 3 types of hate crimes together
plot2 <- hatenew %>%
ggplot() +
geom_bar(aes(x=year, y=crimecount, fill = id), position = "dodge", stat = "identity") +
ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
ylab("Number of Hate Crime Incidents") +
labs(fill = "Hate Crime Type")
plot2

Make bar graphs by county instead of by year
plot3 <- hatenew %>%
ggplot() +
geom_bar(aes(x=county, y=crimecount, fill = id), position = "dodge", stat = "identity") +
ggtitle("Hate Crime Type in NY Counties Between 2010-2016") +
ylab("Number of Hate Crime Incidents") +
labs(fill = "Hate Crime TYpe")
plot3

Top 5 counties
counties <- hatenew %>%
group_by(county) %>%
summarize(sum = sum(crimecount)) %>%
arrange(desc(sum)) %>%
top_n(n=5)
## Selecting by sum
View the counties
counties
## # A tibble: 5 x 2
## county sum
## <chr> <dbl>
## 1 Kings 713
## 2 New York 459
## 3 Suffolk 360
## 4 Nassau 298
## 5 Queens 235
Make a bar plot for the 5 counties
plot4 <- hatenew %>%
filter(county == "Kings" | county == "New York" | county == "Suffolk" | county == "Nassau" | county == "Queens") %>%
ggplot() +
geom_bar(aes(x = county, y=crimecount, fill = id), position = "dodge", stat = "identity") +
labs(ylab = "number of Hate Crime Incidents", title = "5 Counties in NY with Highest Incidents of Hate Crimes", subtitle = "Between 2010-2016",
fill = "Hate Crime Type")
plot4

HOMEWORK
Add a plot to reflect the crime type
extraplot3 <- hatecrimes %>%
ggplot(., aes(year, totalvictims)) + geom_jitter(aes(color = crimetype))
extraplot3

extraplot <- hatecrimes2 %>%
ggplot(., aes(year, totalvictims)) + geom_jitter(aes(color = totaloffenders))
extraplot

Create a plot for just King county
extraplot2 <- hatenew %>%
filter(county == "Kings") %>%
ggplot() +
geom_bar(aes(x = year, y=crimecount, fill = id), position = "dodge", stat = "identity") +
labs(ylab = "Number of Hate Crime Incidents", title = "King County in NY with Highest Incidents of Hate Crimes", subtitle = "Between 2010-2016",
fill = "Hate Crime Type")
extraplot2
