#Import the hate crimes csv dataset
hate<-read.csv("C:\\Users\\lizza\\Documents\\CUNY - Data Analytics\\DATA 606 - Probablity and Statistics\\Final Project\\hate_crimes.csv", header=T)
Number of observations & variables
dim(hate)
## [1] 51 12
Median household income,2016/ Share of the population that is unemployed (seasonally adjusted)/ Share of the population that lives in metropolitan areas,2015/ Share of adults 25 and older with a high-school degree, 2009/ Share of the population that are not U.S. citizens, 2015/ Share of white residents who are living in poverty, 2015/ Gini Index, 2015/ Share of the population that is not white, 2015/ Share of 2016 U.S. presidential voters who voted for Donal Trump/ Hate crimes per 100,000 population, Southern Poverty Law Center, Nov 9-18, 2016/ Average annual hate crimes per 100,000 population, FBI, 2010-2015/
names(hate)
## [1] "state"
## [2] "median_household_income"
## [3] "share_unemployed_seasonal"
## [4] "share_population_in_metro_areas"
## [5] "share_population_with_high_school_degree"
## [6] "share_non_citizen"
## [7] "share_white_poverty"
## [8] "gini_index"
## [9] "share_non_white"
## [10] "share_voters_voted_trump"
## [11] "hate_crimes_per_100k_splc"
## [12] "avg_hatecrimes_per_100k_fbi"
View the Heads & Tails of the Dataset
head(hate)
## state median_household_income share_unemployed_seasonal
## 1 Alabama 42278 0.060
## 2 Alaska 67629 0.064
## 3 Arizona 49254 0.063
## 4 Arkansas 44922 0.052
## 5 California 60487 0.059
## 6 Colorado 60940 0.040
## share_population_in_metro_areas share_population_with_high_school_degree
## 1 0.64 0.821
## 2 0.63 0.914
## 3 0.90 0.842
## 4 0.69 0.824
## 5 0.97 0.806
## 6 0.80 0.893
## share_non_citizen share_white_poverty gini_index share_non_white
## 1 0.02 0.12 0.472 0.35
## 2 0.04 0.06 0.422 0.42
## 3 0.10 0.09 0.455 0.49
## 4 0.04 0.12 0.458 0.26
## 5 0.13 0.09 0.471 0.61
## 6 0.06 0.07 0.457 0.31
## share_voters_voted_trump hate_crimes_per_100k_splc
## 1 0.63 0.12583893
## 2 0.53 0.14374012
## 3 0.50 0.22531995
## 4 0.60 0.06906077
## 5 0.33 0.25580536
## 6 0.44 0.39052330
## avg_hatecrimes_per_100k_fbi
## 1 1.8064105
## 2 1.6567001
## 3 3.4139280
## 4 0.8692089
## 5 2.3979859
## 6 2.8046888
tail(hate)
## state median_household_income share_unemployed_seasonal
## 46 Vermont 60708 0.037
## 47 Virginia 66155 0.043
## 48 Washington 59068 0.052
## 49 West Virginia 39552 0.073
## 50 Wisconsin 58080 0.043
## 51 Wyoming 55690 0.040
## share_population_in_metro_areas
## 46 0.35
## 47 0.89
## 48 0.86
## 49 0.55
## 50 0.69
## 51 0.31
## share_population_with_high_school_degree share_non_citizen
## 46 0.910 0.01
## 47 0.866 0.06
## 48 0.897 0.08
## 49 0.828 0.01
## 50 0.898 0.03
## 51 0.918 0.02
## share_white_poverty gini_index share_non_white share_voters_voted_trump
## 46 0.10 0.444 0.06 0.33
## 47 0.07 0.459 0.38 0.45
## 48 0.09 0.441 0.31 0.38
## 49 0.14 0.451 0.07 0.69
## 50 0.09 0.430 0.22 0.48
## 51 0.09 0.423 0.15 0.70
## hate_crimes_per_100k_splc avg_hatecrimes_per_100k_fbi
## 46 0.3241491 1.9030814
## 47 0.3632489 1.7247546
## 48 0.6774876 3.8177403
## 49 0.3286771 2.0370536
## 50 0.2261971 1.1219447
## 51 NA 0.2669408
Run basic descriptive statistics on the following variables: median_household_income, hate_crimes_per_100k_splc, avg_hatecrimes_per_100k_fbi
Median Household Income, 2016
summary(hate$median_household_income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35521 48657 54916 55224 60719 76165
Hate Crimes Per 100,000 residents, 2016
summary(hate$hate_crimes_per_100k_splc)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.06745 0.14271 0.22620 0.30409 0.35694 1.52230 4
Average Annual Hate Crimes per 100,000 residents, 2010-2105
summary(hate$avg_hatecrimes_per_100k_fbi)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.2669 1.2931 1.9871 2.3676 3.1843 10.9535 1
table(hate$median_household_income>54916)
##
## FALSE TRUE
## 26 25
5 States w/the Lowest Median Household Incomes
#create a subset of data
income <- subset(hate, select = c(state,median_household_income))
#sort by median household income
income_sort <- income[order(income$median_household_income),]
head(income_sort,n=5)
## state median_household_income
## 25 Mississippi 35521
## 49 West Virginia 39552
## 1 Alabama 42278
## 19 Louisiana 42406
## 18 Kentucky 42786
The 5 States w/the Highest Median Household Incomes
tail(income_sort,n=5)
## state median_household_income
## 9 District of Columbia 68277
## 7 Connecticut 70161
## 12 Hawaii 71223
## 30 New Hampshire 73397
## 21 Maryland 76165
We can conclude that Maryland has the largest median household income with 76165 while Mississippi has the smallest median household income with 35521.
The 5 States w/the Lowest Rate of White Poverty
#create a subset of data
poverty <- subset(hate, select = c(state,share_white_poverty))
#sort by states with populations of white poverty
poverty_sort <- poverty[order(poverty$share_white_poverty),]
head(poverty_sort,n=5)
## state share_white_poverty
## 9 District of Columbia 0.04
## 24 Minnesota 0.05
## 2 Alaska 0.06
## 7 Connecticut 0.06
## 21 Maryland 0.06
The 5 States w/the Highest Rate of White Poverty
tail(poverty_sort,n=5)
## state share_white_poverty
## 20 Maine 0.12
## 43 Tennessee 0.13
## 25 Mississippi 0.14
## 49 West Virginia 0.14
## 18 Kentucky 0.17
The District of Columbia had the lowest population of whites living in poverty with 0.04 while Kentucky had the highest population with 0.17.
The 5 States w/the Lowest Population of Trump Voters
#create a subset of data
trump <- subset(hate, select = c(state,share_voters_voted_trump))
#sort by states with populations of white poverty
trump_sort <- trump[order(trump$share_voters_voted_trump),]
head(trump_sort,n=5)
## state share_voters_voted_trump
## 9 District of Columbia 0.04
## 12 Hawaii 0.30
## 5 California 0.33
## 46 Vermont 0.33
## 22 Massachusetts 0.34
The 5 States w/the Highest Population of Trump Voters
tail(trump_sort,n=5)
## state share_voters_voted_trump
## 18 Kentucky 0.63
## 35 North Dakota 0.64
## 37 Oklahoma 0.65
## 49 West Virginia 0.69
## 51 Wyoming 0.70
The District of Columbia had the lowest population of Trump voters with 0.04 while Wyoming had the highest population with 0.70.
Histograms for Lower/Higher States w/Non-Citizens
lower_income <- subset(hate, median_household_income < 54916)
higher_income <- subset(hate, median_household_income > 54916)
Compare Non-Citizens in Lower Income States & Higher Income States
hist(lower_income$share_non_citizen,
#border=NA,
col="beige",#Or use: col=colors()[18]
main="Population of Non-Citizens in Lower Income States",
xlab="Population")
hist(higher_income$share_non_citizen,
#border=NA,
col="beige",#Or use: col=colors()[18]
main="Population of Non-Citizens in Higher Income States",
xlab="Population")
Creating Box Plots for Hate Crimes Per 100K
boxplot(lower_income$hate_crimes_per_100k_splc,
col="beige",
notch=T,
horizontal=T,
main="Hate Crimes Per 100K Residents",
xlab="Population of Lower Income States")
boxplot(higher_income$hate_crimes_per_100k_splc,
col="beige",
notch=T,
horizontal=T,
main="Hate Crimes Per 100K Residents",
xlab="Population of Higher Income States")
Creating Box Plots for Average Hate Crimes Per 100K
boxplot(lower_income$avg_hatecrimes_per_100k_fbi,
col="beige",
notch=T,
horizontal=T,
main="Average Hate Crimes Per 100K/FBI Statistics",
xlab="Population of Lower Income States")
boxplot(higher_income$avg_hatecrimes_per_100k_fbi,
col="beige",
notch=T,
horizontal=T,
main="Average Hate Crimes Per 100K/FBI Statistics",
xlab="Population of Higher Income States")
As per research, the Gini coefficien is a measure of statistical dispersion intended to represent the income or wealth distribution of a nations residents, and is the most commonly used measure of inequality.
Gini Index
qqnorm(lower_income$gini_index, main="Gini Index: Lower Population")
qqline(lower_income$gini_index)
qqnorm(higher_income$gini_index, main="Gini Index: Higher Population")
qqline(higher_income$gini_index)
Trump Voters
qqnorm(lower_income$share_voters_voted_trump, main="Lower Income Population: Trump Voters")
qqline(lower_income$share_voters_voted_trump)
qqnorm(higher_income$share_voters_voted_trump, main="Higher Income Population: Trump Voters")
qqline(higher_income$share_voters_voted_trump)
Adults Over 25 w/High School Diploma
qqnorm(lower_income$share_population_with_high_school_degree, main="Lower Income Population: AdultsOver 25 w/HS Degree")
qqline(lower_income$share_population_with_high_school_degree)
#Confidence Intervals
Average Hate Crimes, FBI
fbi <- hate$avg_hatecrimes_per_100k_fbi
samp_fbi <- sample(fbi, 20)
sample_mean_fbi <-mean(samp_fbi)
se <-sd(samp_fbi)/sqrt(20)
lower <-sample_mean_fbi - 1.96 * se
upper <-sample_mean_fbi + 1.96 * se
c(lower,upper)
## [1] 1.282219 3.307578
Median Household Income
mhi <- hate$median_household_income
samp_mhi <- sample(mhi, 20)
sample_mean_mhi <-mean(samp_mhi)
se <-sd(samp_mhi)/sqrt(20)
lower <-sample_mean_mhi - 1.96 * se
upper <-sample_mean_mhi + 1.96 * se
c(lower,upper)
## [1] 52110.25 59617.85
The correlation between the median household income and the population of adults with a high school degree
cor(hate$median_household_income,hate$share_population_with_high_school_degree)
## [1] 0.65349
inc_deg <-lm(hate$share_population_with_high_school_degree ~ hate$median_household_income, data=hate)
summary(inc_deg)
##
## Call:
## lm(formula = hate$share_population_with_high_school_degree ~
## hate$median_household_income, data = hate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.075845 -0.017330 -0.000675 0.018116 0.048849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.356e-01 2.240e-02 32.846 < 2e-16 ***
## hate$median_household_income 2.418e-06 4.001e-07 6.043 2.01e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02605 on 49 degrees of freedom
## Multiple R-squared: 0.427, Adjusted R-squared: 0.4154
## F-statistic: 36.52 on 1 and 49 DF, p-value: 2.007e-07
plot(hate$share_population_with_high_school_degree ~ hate$median_household_income)
abline(inc_deg)