── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd ("/Users/hunchoamaru/Desktop/data 110" )
hatecrimes <- read_csv ("hateCrimes2010.csv" )
Rows: 423 Columns: 44
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): County, Crime Type
dbl (42): Year, Anti-Male, Anti-Female, Anti-Transgender, Anti-Gender Identi...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Make all headers lowercase and remove spaces
names (hatecrimes) <- tolower (names (hatecrimes))
names (hatecrimes) <- gsub (" " ,"" ,names (hatecrimes))
head (hatecrimes)
# A tibble: 6 × 44
county year crimetype `anti-male` `anti-female` `anti-transgender`
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 Albany 2016 Crimes Against Pe… 0 0 0
2 Albany 2016 Property Crimes 0 0 0
3 Allegany 2016 Property Crimes 0 0 0
4 Bronx 2016 Crimes Against Pe… 0 0 4
5 Bronx 2016 Property Crimes 0 0 0
6 Broome 2016 Crimes Against Pe… 0 0 0
# ℹ 38 more variables: `anti-genderidentityexpression` <dbl>,
# `anti-age*` <dbl>, `anti-white` <dbl>, `anti-black` <dbl>,
# `anti-americanindian/alaskannative` <dbl>, `anti-asian` <dbl>,
# `anti-nativehawaiian/pacificislander` <dbl>,
# `anti-multi-racialgroups` <dbl>, `anti-otherrace` <dbl>,
# `anti-jewish` <dbl>, `anti-catholic` <dbl>, `anti-protestant` <dbl>,
# `anti-islamic(muslim)` <dbl>, `anti-multi-religiousgroups` <dbl>, …
Select only certain hate-crimes
hatecrimes2 <- hatecrimes |>
select (county, year, 'anti-black' , 'anti-white' , 'anti-jewish' , 'anti-catholic' ,'anti-age*' ,'anti-islamic(muslim)' , 'anti-multi-religiousgroups' , 'anti-gaymale' , 'anti-hispanic' , 'anti-otherethnicity/nationalorigin' ) |>
group_by (county, year)
head (hatecrimes2)
# A tibble: 6 × 12
# Groups: county, year [4]
county year `anti-black` `anti-white` `anti-jewish` `anti-catholic`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Albany 2016 1 0 0 0
2 Albany 2016 2 0 0 0
3 Allegany 2016 1 0 0 0
4 Bronx 2016 0 1 0 0
5 Bronx 2016 0 1 1 0
6 Broome 2016 1 0 0 0
# ℹ 6 more variables: `anti-age*` <dbl>, `anti-islamic(muslim)` <dbl>,
# `anti-multi-religiousgroups` <dbl>, `anti-gaymale` <dbl>,
# `anti-hispanic` <dbl>, `anti-otherethnicity/nationalorigin` <dbl>
##Check the dimensions and the summary to make sure no missing values
# There are currently 12 variables with 423 rows.
summary (hatecrimes2)
county year anti-black anti-white
Length:423 Min. :2010 Min. : 0.000 Min. : 0.0000
Class :character 1st Qu.:2011 1st Qu.: 0.000 1st Qu.: 0.0000
Mode :character Median :2013 Median : 1.000 Median : 0.0000
Mean :2013 Mean : 1.761 Mean : 0.3357
3rd Qu.:2015 3rd Qu.: 2.000 3rd Qu.: 0.0000
Max. :2016 Max. :18.000 Max. :11.0000
anti-jewish anti-catholic anti-age* anti-islamic(muslim)
Min. : 0.000 Min. : 0.0000 Min. :0.00000 Min. : 0.0000
1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:0.00000 1st Qu.: 0.0000
Median : 0.000 Median : 0.0000 Median :0.00000 Median : 0.0000
Mean : 3.981 Mean : 0.2695 Mean :0.05201 Mean : 0.4704
3rd Qu.: 3.000 3rd Qu.: 0.0000 3rd Qu.:0.00000 3rd Qu.: 0.0000
Max. :82.000 Max. :12.0000 Max. :9.00000 Max. :10.0000
anti-multi-religiousgroups anti-gaymale anti-hispanic
Min. : 0.00000 Min. : 0.000 Min. : 0.0000
1st Qu.: 0.00000 1st Qu.: 0.000 1st Qu.: 0.0000
Median : 0.00000 Median : 0.000 Median : 0.0000
Mean : 0.07565 Mean : 1.499 Mean : 0.3735
3rd Qu.: 0.00000 3rd Qu.: 1.000 3rd Qu.: 0.0000
Max. :10.00000 Max. :36.000 Max. :17.0000
anti-otherethnicity/nationalorigin
Min. : 0.0000
1st Qu.: 0.0000
Median : 0.0000
Mean : 0.2837
3rd Qu.: 0.0000
Max. :19.0000
Look deeper into crimes against blacks, gay males, and jews
hatenew <- hatelong |>
filter ( victim_cat %in% c ("anti-black" , "anti-jewish" , "anti-gaymale" ))|>
group_by (year, county) |>
arrange (desc (crimecount))
hatenew
# A tibble: 1,269 × 4
# Groups: year, county [277]
county year victim_cat crimecount
<chr> <dbl> <chr> <dbl>
1 Kings 2012 anti-jewish 82
2 Kings 2016 anti-jewish 51
3 Suffolk 2014 anti-jewish 48
4 Suffolk 2012 anti-jewish 48
5 Kings 2011 anti-jewish 44
6 Kings 2013 anti-jewish 41
7 Kings 2010 anti-jewish 39
8 Nassau 2011 anti-jewish 38
9 Suffolk 2013 anti-jewish 37
10 Nassau 2016 anti-jewish 36
# ℹ 1,259 more rows
Plot these three types of hate crimes together
plot2 <- hatenew |>
ggplot () +
geom_bar (aes (x= year, y= crimecount, fill = victim_cat),
position = "dodge" , stat = "identity" ) +
labs (fill = "Hate Crime Type" ,
y = "Number of Hate Crime Incidents" ,
title = "Hate Crime Type in NY Counties Between 2010-2016" ,
caption = "Source: NY State Division of Criminal Justice Services" )
plot2
What about the counties?
plot3 <- hatenew |>
ggplot () +
geom_bar (aes (x= county, y= crimecount, fill = victim_cat),
position = "dodge" , stat = "identity" ) +
labs (fill = "Hate Crime Type" ,
y = "Number of Hate Crime Incidents" ,
title = "Hate Crime Type in NY Counties Between 2010-2016" ,
caption = "Source: NY State Division of Criminal Justice Services" )
plot3
So many counties
counties <- hatenew |>
group_by (year, county)|>
summarize (sum = sum (crimecount)) |>
arrange (desc (sum))
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
# A tibble: 277 × 3
# Groups: year [7]
year county sum
<dbl> <chr> <dbl>
1 2012 Kings 136
2 2010 Kings 110
3 2016 Kings 101
4 2013 Kings 96
5 2014 Kings 94
6 2015 Kings 90
7 2011 Kings 86
8 2016 New York 86
9 2012 Suffolk 83
10 2013 New York 75
# ℹ 267 more rows
Top 5
counties2 <- hatenew |>
group_by (county)|>
summarize (sum = sum (crimecount)) |>
slice_max (order_by = sum, n= 5 )
counties2
# A tibble: 5 × 2
county sum
<chr> <dbl>
1 Kings 713
2 New York 459
3 Suffolk 360
4 Nassau 298
5 Queens 235
plot4 <- hatenew |>
filter (county %in% c ("Kings" , "New York" , "Suffolk" , "Nassau" , "Queens" )) |>
ggplot () +
geom_bar (aes (x= county, y= crimecount, fill = victim_cat),
position = "dodge" , stat = "identity" ) +
labs (y = "Number of Hate Crime Incidents" ,
title = "5 Counties in NY with Highest Incidents of Hate Crimes" ,
subtitle = "Between 2010-2016" ,
fill = "Hate Crime Type" ,
caption = "Source: NY State Division of Criminal Justice Services" )
plot4
How would calculations be affected by looking at hate crimes in counties per year by population densities?
setwd ("/Users/hunchoamaru/Desktop/data 110" )
nypop <- read_csv ("newyorkpopulation.csv" )
Rows: 62 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Geography
dbl (7): 2010, 2011, 2012, 2013, 2014, 2015, 2016
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Clean the county name to match the other dataset
nypop$ Geography <- gsub (" , New York" , "" , nypop$ Geography)
nypop$ Geography <- gsub ("County" , "" , nypop$ Geography)
nypoplong <- nypop |>
rename (county = Geography) |>
gather ("year" , "population" , 2 : 8 )
nypoplong$ year <- as.double (nypoplong$ year)
head (nypoplong)
# A tibble: 6 × 3
county year population
<chr> <dbl> <dbl>
1 Albany , New York 2010 304078
2 Allegany , New York 2010 48949
3 Bronx , New York 2010 1388240
4 Broome , New York 2010 200469
5 Cattaraugus , New York 2010 80249
6 Cayuga , New York 2010 79844
Focus on 2012
nypoplong12 <- nypoplong |>
filter (year == 2012 ) |>
arrange (desc (population)) |>
head (10 )
nypoplong12$ county<- gsub (" , New York" ,"" ,nypoplong12$ county)
nypoplong12
# A tibble: 10 × 3
county year population
<chr> <dbl> <dbl>
1 Kings 2012 2572282
2 Queens 2012 2278024
3 New York 2012 1625121
4 Suffolk 2012 1499382
5 Bronx 2012 1414774
6 Nassau 2012 1350748
7 Westchester 2012 961073
8 Erie 2012 920792
9 Monroe 2012 748947
10 Richmond 2012 470978
Filter hate crimes just for 2012 as well
counties12 <- counties |>
filter (year == 2012 ) |>
arrange (desc (sum))
counties12
# A tibble: 41 × 3
# Groups: year [1]
year county sum
<dbl> <chr> <dbl>
1 2012 Kings 136
2 2012 Suffolk 83
3 2012 New York 71
4 2012 Nassau 48
5 2012 Queens 48
6 2012 Erie 28
7 2012 Bronx 23
8 2012 Richmond 18
9 2012 Multiple 14
10 2012 Westchester 13
# ℹ 31 more rows
Join the Hate Crimes data with NY population data for 2012
datajoin <- counties12 |>
full_join (nypoplong12, by= c ("county" , "year" ))
datajoin
# A tibble: 41 × 4
# Groups: year [1]
year county sum population
<dbl> <chr> <dbl> <dbl>
1 2012 Kings 136 2572282
2 2012 Suffolk 83 1499382
3 2012 New York 71 1625121
4 2012 Nassau 48 1350748
5 2012 Queens 48 2278024
6 2012 Erie 28 920792
7 2012 Bronx 23 1414774
8 2012 Richmond 18 470978
9 2012 Multiple 14 NA
10 2012 Westchester 13 961073
# ℹ 31 more rows
Calculate the rate of incidents per 100,000. Then arrange in descending order
datajoinrate <- datajoin |>
mutate (rate = sum/ population* 100000 ) |>
arrange (desc (rate))
datajoinrate
# A tibble: 41 × 5
# Groups: year [1]
year county sum population rate
<dbl> <chr> <dbl> <dbl> <dbl>
1 2012 Suffolk 83 1499382 5.54
2 2012 Kings 136 2572282 5.29
3 2012 New York 71 1625121 4.37
4 2012 Richmond 18 470978 3.82
5 2012 Nassau 48 1350748 3.55
6 2012 Erie 28 920792 3.04
7 2012 Queens 48 2278024 2.11
8 2012 Bronx 23 1414774 1.63
9 2012 Westchester 13 961073 1.35
10 2012 Monroe 5 748947 0.668
# ℹ 31 more rows
dt <- datajoinrate[,c ("county" ,"rate" )]
dt
# A tibble: 41 × 2
county rate
<chr> <dbl>
1 Suffolk 5.54
2 Kings 5.29
3 New York 4.37
4 Richmond 3.82
5 Nassau 3.55
6 Erie 3.04
7 Queens 2.11
8 Bronx 1.63
9 Westchester 1.35
10 Monroe 0.668
# ℹ 31 more rows
Aggregating some of the categories
aggregategroups <- hatecrimes |>
pivot_longer (
cols = 4 : 44 ,
names_to = "victim_cat" ,
values_to = "crimecount"
)
unique (aggregategroups$ victim_cat)
[1] "anti-male"
[2] "anti-female"
[3] "anti-transgender"
[4] "anti-genderidentityexpression"
[5] "anti-age*"
[6] "anti-white"
[7] "anti-black"
[8] "anti-americanindian/alaskannative"
[9] "anti-asian"
[10] "anti-nativehawaiian/pacificislander"
[11] "anti-multi-racialgroups"
[12] "anti-otherrace"
[13] "anti-jewish"
[14] "anti-catholic"
[15] "anti-protestant"
[16] "anti-islamic(muslim)"
[17] "anti-multi-religiousgroups"
[18] "anti-atheism/agnosticism"
[19] "anti-religiouspracticegenerally"
[20] "anti-otherreligion"
[21] "anti-buddhist"
[22] "anti-easternorthodox(greek,russian,etc.)"
[23] "anti-hindu"
[24] "anti-jehovahswitness"
[25] "anti-mormon"
[26] "anti-otherchristian"
[27] "anti-sikh"
[28] "anti-hispanic"
[29] "anti-arab"
[30] "anti-otherethnicity/nationalorigin"
[31] "anti-non-hispanic*"
[32] "anti-gaymale"
[33] "anti-gayfemale"
[34] "anti-gay(maleandfemale)"
[35] "anti-heterosexual"
[36] "anti-bisexual"
[37] "anti-physicaldisability"
[38] "anti-mentaldisability"
[39] "totalincidents"
[40] "totalvictims"
[41] "totaloffenders"
aggregategroups <- aggregategroups |>
mutate (group = case_when (
victim_cat %in% c ("anti-transgender" , "anti-gayfemale" , "anti-gendervictim_catendityexpression" , "anti-gaymale" , "anti-gay(maleandfemale" , "anti-bisexual" ) ~ "anti-lgbtq" ,
victim_cat %in% c ("anti-multi-racialgroups" , "anti-jewish" , "anti-protestant" , "anti-multi-religousgroups" , "anti-religiouspracticegenerally" , "anti-buddhist" , "anti-hindu" , "anti-mormon" , "anti-sikh" , "anti-catholic" , "anti-islamic(muslim)" , "anti-atheism/agnosticism" , "anti-otherreligion" , "anti-easternorthodox(greek,russian,etc.)" , "anti-jehovahswitness" , "anti-otherchristian" ) ~ "anti-religion" ,
victim_cat %in% c ("anti-asian" , "anti-arab" , "anti-non-hispanic" , "anti-white" , "anti-americanindian/alaskannative" , "anti-nativehawaiian/pacificislander" , "anti-otherrace" , "anti-hispanic" , "anti-otherethnicity/nationalorigin" ) ~ "anti-ethnicity" ,
victim_cat %in% c ("anti-physicaldisability" , "anti-mentaldisability" ) ~ "anti-disability" ,
victim_cat %in% c ("anti-female" , "anti-male" ) ~ "anti-gender" ,
TRUE ~ "others" ))
aggregategroups
# A tibble: 17,343 × 6
county year crimetype victim_cat crimecount group
<chr> <dbl> <chr> <chr> <dbl> <chr>
1 Albany 2016 Crimes Against Persons anti-male 0 anti…
2 Albany 2016 Crimes Against Persons anti-female 0 anti…
3 Albany 2016 Crimes Against Persons anti-transgender 0 anti…
4 Albany 2016 Crimes Against Persons anti-genderidentityexpr… 0 othe…
5 Albany 2016 Crimes Against Persons anti-age* 0 othe…
6 Albany 2016 Crimes Against Persons anti-white 0 anti…
7 Albany 2016 Crimes Against Persons anti-black 1 othe…
8 Albany 2016 Crimes Against Persons anti-americanindian/ala… 0 anti…
9 Albany 2016 Crimes Against Persons anti-asian 0 anti…
10 Albany 2016 Crimes Against Persons anti-nativehawaiian/pac… 0 anti…
# ℹ 17,333 more rows
or create subset with just lgbtq
lgbtq <- hatecrimes |>
pivot_longer (
cols = 4 : 44 ,
names_to = "victim_cat" ,
values_to = "crimecount" ) |>
filter (victim_cat %in% c ("anti-transgender" , "anti-gayfemale" , "anti-gendervictim_catendityexpression" , "anti-gaymale" , "anti-gay(maleandfemale" , "anti-bisexual" ))
lgbtq
# A tibble: 1,692 × 5
county year crimetype victim_cat crimecount
<chr> <dbl> <chr> <chr> <dbl>
1 Albany 2016 Crimes Against Persons anti-transgender 0
2 Albany 2016 Crimes Against Persons anti-gaymale 1
3 Albany 2016 Crimes Against Persons anti-gayfemale 0
4 Albany 2016 Crimes Against Persons anti-bisexual 0
5 Albany 2016 Property Crimes anti-transgender 0
6 Albany 2016 Property Crimes anti-gaymale 0
7 Albany 2016 Property Crimes anti-gayfemale 0
8 Albany 2016 Property Crimes anti-bisexual 0
9 Allegany 2016 Property Crimes anti-transgender 0
10 Allegany 2016 Property Crimes anti-gaymale 0
# ℹ 1,682 more rows