library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(ggplot2)

Data Preparation

initial <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/hate-crimes/hate_crimes.csv")
## Parsed with column specification:
## cols(
##   state = col_character(),
##   median_household_income = col_double(),
##   share_unemployed_seasonal = col_double(),
##   share_population_in_metro_areas = col_double(),
##   share_population_with_high_school_degree = col_double(),
##   share_non_citizen = col_double(),
##   share_white_poverty = col_double(),
##   gini_index = col_double(),
##   share_non_white = col_double(),
##   share_voters_voted_trump = col_double(),
##   hate_crimes_per_100k_splc = col_double(),
##   avg_hatecrimes_per_100k_fbi = col_double()
## )

Create a dataframe and also add a column ElectTrump where 1 is voted for Trump and 0 not voted for Trump

hate_crimes <- data.frame(initial %>% select(state, share_non_white, share_voters_voted_trump, hate_crimes_per_100k_splc) %>% filter(hate_crimes_per_100k_splc > 0) %>% filter(state != "District of Columbia") %>% mutate(ElectTrump = case_when(share_voters_voted_trump > 0.5 ~ 1, share_voters_voted_trump < 0.5 ~ 0)))
hate_crimes[3, "ElectTrump"] = 1

Research question

Find a relationship between hate crimes during the period of November 9-18 2016, and the presence of minorities groups and presidential elect chosen per state across the US.

Cases

There are in total 51 cases among which 4 have value NA for the variable hate_crimes_per_100k_splc so we will be considering the only 47 cases

Data collection

Data is collected from the github account of fivethirtyeight

Type of study

This is an observational study

Data Source

Response

Response variable is hate_crimes_per_100k_splc (hate crimes per 100000 population per the Southern Poverty Law Center) and is numerical

Explanatory

Explanatory variables will be

i)Whether the state was in Blue or Red and its categorical

ii)vote share of non-whites which is numerical

Relevant summary statistics

initial
## # A tibble: 51 x 12
##    state median_househol~ share_unemploye~ share_populatio~ share_populatio~
##    <chr>            <dbl>            <dbl>            <dbl>            <dbl>
##  1 Alab~            42278            0.06              0.64            0.821
##  2 Alas~            67629            0.064             0.63            0.914
##  3 Ariz~            49254            0.063             0.9             0.842
##  4 Arka~            44922            0.052             0.69            0.824
##  5 Cali~            60487            0.059             0.97            0.806
##  6 Colo~            60940            0.04              0.8             0.893
##  7 Conn~            70161            0.052             0.94            0.886
##  8 Dela~            57522            0.049             0.9             0.874
##  9 Dist~            68277            0.067             1               0.871
## 10 Flor~            46140            0.052             0.96            0.853
## # ... with 41 more rows, and 7 more variables: share_non_citizen <dbl>,
## #   share_white_poverty <dbl>, gini_index <dbl>, share_non_white <dbl>,
## #   share_voters_voted_trump <dbl>, hate_crimes_per_100k_splc <dbl>,
## #   avg_hatecrimes_per_100k_fbi <dbl>
hate_crimes
##             state share_non_white share_voters_voted_trump
## 1         Alabama            0.35                     0.63
## 2          Alaska            0.42                     0.53
## 3         Arizona            0.49                     0.50
## 4        Arkansas            0.26                     0.60
## 5      California            0.61                     0.33
## 6        Colorado            0.31                     0.44
## 7     Connecticut            0.30                     0.41
## 8        Delaware            0.37                     0.42
## 9         Florida            0.46                     0.49
## 10        Georgia            0.48                     0.51
## 11          Idaho            0.16                     0.59
## 12       Illinois            0.37                     0.39
## 13        Indiana            0.20                     0.57
## 14           Iowa            0.15                     0.52
## 15         Kansas            0.25                     0.57
## 16       Kentucky            0.15                     0.63
## 17      Louisiana            0.42                     0.58
## 18          Maine            0.09                     0.45
## 19       Maryland            0.50                     0.35
## 20  Massachusetts            0.27                     0.34
## 21       Michigan            0.24                     0.48
## 22      Minnesota            0.18                     0.45
## 23    Mississippi            0.44                     0.58
## 24       Missouri            0.20                     0.57
## 25        Montana            0.10                     0.57
## 26       Nebraska            0.21                     0.60
## 27         Nevada            0.50                     0.46
## 28  New Hampshire            0.09                     0.47
## 29     New Jersey            0.44                     0.42
## 30     New Mexico            0.62                     0.40
## 31       New York            0.42                     0.37
## 32 North Carolina            0.38                     0.51
## 33           Ohio            0.21                     0.52
## 34       Oklahoma            0.35                     0.65
## 35         Oregon            0.26                     0.41
## 36   Pennsylvania            0.24                     0.49
## 37   Rhode Island            0.28                     0.40
## 38 South Carolina            0.36                     0.55
## 39      Tennessee            0.27                     0.61
## 40          Texas            0.56                     0.53
## 41           Utah            0.19                     0.47
## 42        Vermont            0.06                     0.33
## 43       Virginia            0.38                     0.45
## 44     Washington            0.31                     0.38
## 45  West Virginia            0.07                     0.69
## 46      Wisconsin            0.22                     0.48
##    hate_crimes_per_100k_splc ElectTrump
## 1                 0.12583893          1
## 2                 0.14374012          1
## 3                 0.22531995          1
## 4                 0.06906077          1
## 5                 0.25580536          0
## 6                 0.39052330          0
## 7                 0.33539227          0
## 8                 0.32275417          0
## 9                 0.18752122          0
## 10                0.12042027          1
## 11                0.12420817          1
## 12                0.19534455          0
## 13                0.24700888          1
## 14                0.45442742          1
## 15                0.10515247          1
## 16                0.32439697          1
## 17                0.10973335          1
## 18                0.61557402          0
## 19                0.37043897          0
## 20                0.63081059          0
## 21                0.40377937          0
## 22                0.62747993          0
## 23                0.06744680          1
## 24                0.18452351          1
## 25                0.49549103          1
## 26                0.15948963          1
## 27                0.14167316          0
## 28                0.15154960          0
## 29                0.07830591          0
## 30                0.29481132          0
## 31                0.35062045          0
## 32                0.24400659          1
## 33                0.19071396          1
## 34                0.13362910          1
## 35                0.83284961          0
## 36                0.28510109          0
## 37                0.09540164          0
## 38                0.20989442          1
## 39                0.19993848          1
## 40                0.21358394          1
## 41                0.13654673          0
## 42                0.32414911          0
## 43                0.36324890          0
## 44                0.67748765          0
## 45                0.32867707          1
## 46                0.22619711          0
ggplot(hate_crimes, aes(x = state, y = hate_crimes_per_100k_splc, fill = ElectTrump)) + geom_bar(stat = "identity") + scale_fill_gradient(high = "red",low ="blue") + theme(axis.text.x = element_text(angle = 90, hjust =1))

summary <- hate_crimes %>% select(hate_crimes_per_100k_splc, ElectTrump) %>% group_by(ElectTrump) %>% summarize(MeanHateCrimesPer100k = mean(hate_crimes_per_100k_splc))
## `summarise()` ungrouping output (override with `.groups` argument)
summary
## # A tibble: 2 x 2
##   ElectTrump MeanHateCrimesPer100k
##        <dbl>                 <dbl>
## 1          0                 0.346
## 2          1                 0.203