library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
Data Preparation
initial <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/hate-crimes/hate_crimes.csv")
## Parsed with column specification:
## cols(
## state = col_character(),
## median_household_income = col_double(),
## share_unemployed_seasonal = col_double(),
## share_population_in_metro_areas = col_double(),
## share_population_with_high_school_degree = col_double(),
## share_non_citizen = col_double(),
## share_white_poverty = col_double(),
## gini_index = col_double(),
## share_non_white = col_double(),
## share_voters_voted_trump = col_double(),
## hate_crimes_per_100k_splc = col_double(),
## avg_hatecrimes_per_100k_fbi = col_double()
## )
Create a dataframe and also add a column ElectTrump where 1 is voted for Trump and 0 not voted for Trump
hate_crimes <- data.frame(initial %>% select(state, share_non_white, share_voters_voted_trump, hate_crimes_per_100k_splc) %>% filter(hate_crimes_per_100k_splc > 0) %>% filter(state != "District of Columbia") %>% mutate(ElectTrump = case_when(share_voters_voted_trump > 0.5 ~ 1, share_voters_voted_trump < 0.5 ~ 0)))
hate_crimes[3, "ElectTrump"] = 1
Research question
Find a relationship between hate crimes during the period of November 9-18 2016, and the presence of minorities groups and presidential elect chosen per state across the US.
Cases
There are in total 51 cases among which 4 have value NA for the variable hate_crimes_per_100k_splc so we will be considering the only 47 cases
Data collection
Data is collected from the github account of fivethirtyeight
Type of study
This is an observational study
Response
Response variable is hate_crimes_per_100k_splc (hate crimes per 100000 population per the Southern Poverty Law Center) and is numerical
Explanatory
Explanatory variables will be
i)Whether the state was in Blue or Red and its categorical
ii)vote share of non-whites which is numerical
Relevant summary statistics
initial
## # A tibble: 51 x 12
## state median_househol~ share_unemploye~ share_populatio~ share_populatio~
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alab~ 42278 0.06 0.64 0.821
## 2 Alas~ 67629 0.064 0.63 0.914
## 3 Ariz~ 49254 0.063 0.9 0.842
## 4 Arka~ 44922 0.052 0.69 0.824
## 5 Cali~ 60487 0.059 0.97 0.806
## 6 Colo~ 60940 0.04 0.8 0.893
## 7 Conn~ 70161 0.052 0.94 0.886
## 8 Dela~ 57522 0.049 0.9 0.874
## 9 Dist~ 68277 0.067 1 0.871
## 10 Flor~ 46140 0.052 0.96 0.853
## # ... with 41 more rows, and 7 more variables: share_non_citizen <dbl>,
## # share_white_poverty <dbl>, gini_index <dbl>, share_non_white <dbl>,
## # share_voters_voted_trump <dbl>, hate_crimes_per_100k_splc <dbl>,
## # avg_hatecrimes_per_100k_fbi <dbl>
hate_crimes
## state share_non_white share_voters_voted_trump
## 1 Alabama 0.35 0.63
## 2 Alaska 0.42 0.53
## 3 Arizona 0.49 0.50
## 4 Arkansas 0.26 0.60
## 5 California 0.61 0.33
## 6 Colorado 0.31 0.44
## 7 Connecticut 0.30 0.41
## 8 Delaware 0.37 0.42
## 9 Florida 0.46 0.49
## 10 Georgia 0.48 0.51
## 11 Idaho 0.16 0.59
## 12 Illinois 0.37 0.39
## 13 Indiana 0.20 0.57
## 14 Iowa 0.15 0.52
## 15 Kansas 0.25 0.57
## 16 Kentucky 0.15 0.63
## 17 Louisiana 0.42 0.58
## 18 Maine 0.09 0.45
## 19 Maryland 0.50 0.35
## 20 Massachusetts 0.27 0.34
## 21 Michigan 0.24 0.48
## 22 Minnesota 0.18 0.45
## 23 Mississippi 0.44 0.58
## 24 Missouri 0.20 0.57
## 25 Montana 0.10 0.57
## 26 Nebraska 0.21 0.60
## 27 Nevada 0.50 0.46
## 28 New Hampshire 0.09 0.47
## 29 New Jersey 0.44 0.42
## 30 New Mexico 0.62 0.40
## 31 New York 0.42 0.37
## 32 North Carolina 0.38 0.51
## 33 Ohio 0.21 0.52
## 34 Oklahoma 0.35 0.65
## 35 Oregon 0.26 0.41
## 36 Pennsylvania 0.24 0.49
## 37 Rhode Island 0.28 0.40
## 38 South Carolina 0.36 0.55
## 39 Tennessee 0.27 0.61
## 40 Texas 0.56 0.53
## 41 Utah 0.19 0.47
## 42 Vermont 0.06 0.33
## 43 Virginia 0.38 0.45
## 44 Washington 0.31 0.38
## 45 West Virginia 0.07 0.69
## 46 Wisconsin 0.22 0.48
## hate_crimes_per_100k_splc ElectTrump
## 1 0.12583893 1
## 2 0.14374012 1
## 3 0.22531995 1
## 4 0.06906077 1
## 5 0.25580536 0
## 6 0.39052330 0
## 7 0.33539227 0
## 8 0.32275417 0
## 9 0.18752122 0
## 10 0.12042027 1
## 11 0.12420817 1
## 12 0.19534455 0
## 13 0.24700888 1
## 14 0.45442742 1
## 15 0.10515247 1
## 16 0.32439697 1
## 17 0.10973335 1
## 18 0.61557402 0
## 19 0.37043897 0
## 20 0.63081059 0
## 21 0.40377937 0
## 22 0.62747993 0
## 23 0.06744680 1
## 24 0.18452351 1
## 25 0.49549103 1
## 26 0.15948963 1
## 27 0.14167316 0
## 28 0.15154960 0
## 29 0.07830591 0
## 30 0.29481132 0
## 31 0.35062045 0
## 32 0.24400659 1
## 33 0.19071396 1
## 34 0.13362910 1
## 35 0.83284961 0
## 36 0.28510109 0
## 37 0.09540164 0
## 38 0.20989442 1
## 39 0.19993848 1
## 40 0.21358394 1
## 41 0.13654673 0
## 42 0.32414911 0
## 43 0.36324890 0
## 44 0.67748765 0
## 45 0.32867707 1
## 46 0.22619711 0
ggplot(hate_crimes, aes(x = state, y = hate_crimes_per_100k_splc, fill = ElectTrump)) + geom_bar(stat = "identity") + scale_fill_gradient(high = "red",low ="blue") + theme(axis.text.x = element_text(angle = 90, hjust =1))

summary <- hate_crimes %>% select(hate_crimes_per_100k_splc, ElectTrump) %>% group_by(ElectTrump) %>% summarize(MeanHateCrimesPer100k = mean(hate_crimes_per_100k_splc))
## `summarise()` ungrouping output (override with `.groups` argument)
summary
## # A tibble: 2 x 2
## ElectTrump MeanHateCrimesPer100k
## <dbl> <dbl>
## 1 0 0.346
## 2 1 0.203