In my project I want to analyse the relationship between quantity of hate crimes in the states with the greater share of the population voted for Trump in 2016?
I also want to find out if income inequality affect on the rate of hate crimes?
The data contains 51 cases (50 US States and District of Columbia) and 12 variables.
Pre-election data collected by the FBI Uniform Crime Reporting Program from law enforcement agencies
Post-election data acollected by Southern Poverty Law Center that used media accounts and people’s self-reports.
This is an observational study.
Data is taken from: https://github.com/fivethirtyeight/data/tree/master/hate-crimes Sources: Kaiser Family Foundation, Census Bureau, United States Elections Project, Southern Poverty Law Center, FBI
Average annual hate crimes - numerical variable
Share of the population who voted for Trump in 2016 -numerical variable
Annual income - numerical variable
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
data<- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/hate-crimes/hate_crimes.csv", header=TRUE, check.names = FALSE)
summary(data$median_household_income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35521 48657 54916 55224 60719 76165
summary(data$share_voters_voted_trump)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.040 0.415 0.490 0.490 0.575 0.700
summary(data$avg_hatecrimes_per_100k_fbi)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.2669 1.2931 1.9871 2.3676 3.1843 10.9535 1
summary(data$hate_crimes_per_100k_splc)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.06745 0.14271 0.22620 0.30409 0.35694 1.52230 4
plot(x=data$share_voters_voted_trump,y=data$hate_crimes_per_100k_splc, col=c("black", "yellow"))
crp<-ggplot(data, aes(x=state, y=hate_crimes_per_100k_splc, fill=share_voters_voted_trump)) + geom_bar(stat = "identity") + theme(axis.text.x.bottom = element_text(angle = 90, hjust =1))+ scale_fill_gradient(high = "yellow",low ="black")
crp
## Warning: Removed 4 rows containing missing values (position_stack).
crstates<- data[,c (1,11, 12)]
State=crstates$state
Pre_Elec =crstates$avg_hatecrimes_per_100k_fbi
Post_Elec=crstates$hate_crimes_per_100k_splc
dataAnalysis = data.frame(State,Pre_Elec)
#Average annual hate crimes per 100,000 population, FBI, 2010-2015 per state
ggplot(dataAnalysis,aes(x=State, y=Pre_Elec)) + geom_bar(stat="identity", color="red")+ theme(axis.text.x = element_text(angle = 90, hjust =1))
## Warning: Removed 1 rows containing missing values (position_stack).
##Hate crimes per 100,000 population, SPLC, Nov. 9-18, 2016 by state
ggplot(dataAnalysis,aes(x=State, y=Post_Elec)) + geom_bar(stat="identity", color="yellow")+ theme(axis.text.x = element_text(angle = 90, hjust =1))
## Warning: Removed 4 rows containing missing values (position_stack).
income=data$median_household_income
Pre_Elec=data$avg_hatecrimes_per_100k_fbi
Post_Elec=data$hate_crimes_per_100k_splc
dataAnalysis = data.frame(income,Pre_Elec,Post_Elec)
hist(data$median_household_income, main = "Histogram for Median Income", xlab = "Median Income", col = "grey")
hist(data$avg_hatecrimes_per_100k_fbi, main = "Average annual hate crimes per 100,000 population, FBI, 2010-2015", xlab = "Average annual hate crimes", col = "grey")
describe(data$avg_hatecrimes_per_100k_fbi)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 50 2.37 1.71 1.99 2.17 1.37 0.27 10.95 10.69 2.49 10.08
## se
## X1 0.24
hist(data$hate_crimes_per_100k_splc, main = "Hate crimes per 100,000 population, SPLC, Nov. 9-18, 2016", xlab = "Hate crimes per 100,000 population", col = "grey")
describe(data$hate_crimes_per_100k_splc)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 47 0.3 0.25 0.23 0.26 0.15 0.07 1.52 1.45 2.64 9.29
## se
## X1 0.04
`