Hate-Crimes Data

#Import the hate crimes csv dataset
hate<-read.csv("C:\\Users\\lizza\\Documents\\CUNY - Data Analytics\\DATA 606 - Probablity and Statistics\\Final Project\\hate_crimes.csv", header=T)

Dimensions & Names of the Hate dataset

Number of observations & variables

dim(hate)
## [1] 51 12

Variable Description & Names

Median household income,2016/ Share of the population that is unemployed (seasonally adjusted)/ Share of the population that lives in metropolitan areas,2015/ Share of adults 25 and older with a high-school degree, 2009/ Share of the population that are not U.S. citizens, 2015/ Share of white residents who are living in poverty, 2015/ Gini Index, 2015/ Share of the population that is not white, 2015/ Share of 2016 U.S. presidential voters who voted for Donal Trump/ Hate crimes per 100,000 population, Southern Poverty Law Center, Nov 9-18, 2016/ Average annual hate crimes per 100,000 population, FBI, 2010-2015/

names(hate)
##  [1] "state"                                   
##  [2] "median_household_income"                 
##  [3] "share_unemployed_seasonal"               
##  [4] "share_population_in_metro_areas"         
##  [5] "share_population_with_high_school_degree"
##  [6] "share_non_citizen"                       
##  [7] "share_white_poverty"                     
##  [8] "gini_index"                              
##  [9] "share_non_white"                         
## [10] "share_voters_voted_trump"                
## [11] "hate_crimes_per_100k_splc"               
## [12] "avg_hatecrimes_per_100k_fbi"

View the Heads & Tails of the Dataset

head(hate)
##        state median_household_income share_unemployed_seasonal
## 1    Alabama                   42278                     0.060
## 2     Alaska                   67629                     0.064
## 3    Arizona                   49254                     0.063
## 4   Arkansas                   44922                     0.052
## 5 California                   60487                     0.059
## 6   Colorado                   60940                     0.040
##   share_population_in_metro_areas share_population_with_high_school_degree
## 1                            0.64                                    0.821
## 2                            0.63                                    0.914
## 3                            0.90                                    0.842
## 4                            0.69                                    0.824
## 5                            0.97                                    0.806
## 6                            0.80                                    0.893
##   share_non_citizen share_white_poverty gini_index share_non_white
## 1              0.02                0.12      0.472            0.35
## 2              0.04                0.06      0.422            0.42
## 3              0.10                0.09      0.455            0.49
## 4              0.04                0.12      0.458            0.26
## 5              0.13                0.09      0.471            0.61
## 6              0.06                0.07      0.457            0.31
##   share_voters_voted_trump hate_crimes_per_100k_splc
## 1                     0.63                0.12583893
## 2                     0.53                0.14374012
## 3                     0.50                0.22531995
## 4                     0.60                0.06906077
## 5                     0.33                0.25580536
## 6                     0.44                0.39052330
##   avg_hatecrimes_per_100k_fbi
## 1                   1.8064105
## 2                   1.6567001
## 3                   3.4139280
## 4                   0.8692089
## 5                   2.3979859
## 6                   2.8046888
tail(hate)
##            state median_household_income share_unemployed_seasonal
## 46       Vermont                   60708                     0.037
## 47      Virginia                   66155                     0.043
## 48    Washington                   59068                     0.052
## 49 West Virginia                   39552                     0.073
## 50     Wisconsin                   58080                     0.043
## 51       Wyoming                   55690                     0.040
##    share_population_in_metro_areas
## 46                            0.35
## 47                            0.89
## 48                            0.86
## 49                            0.55
## 50                            0.69
## 51                            0.31
##    share_population_with_high_school_degree share_non_citizen
## 46                                    0.910              0.01
## 47                                    0.866              0.06
## 48                                    0.897              0.08
## 49                                    0.828              0.01
## 50                                    0.898              0.03
## 51                                    0.918              0.02
##    share_white_poverty gini_index share_non_white share_voters_voted_trump
## 46                0.10      0.444            0.06                     0.33
## 47                0.07      0.459            0.38                     0.45
## 48                0.09      0.441            0.31                     0.38
## 49                0.14      0.451            0.07                     0.69
## 50                0.09      0.430            0.22                     0.48
## 51                0.09      0.423            0.15                     0.70
##    hate_crimes_per_100k_splc avg_hatecrimes_per_100k_fbi
## 46                 0.3241491                   1.9030814
## 47                 0.3632489                   1.7247546
## 48                 0.6774876                   3.8177403
## 49                 0.3286771                   2.0370536
## 50                 0.2261971                   1.1219447
## 51                        NA                   0.2669408

Run basic descriptive statistics on the following variables: median_household_income, hate_crimes_per_100k_splc, avg_hatecrimes_per_100k_fbi

Median Household Income, 2016

summary(hate$median_household_income)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   35521   48657   54916   55224   60719   76165

Hate Crimes Per 100,000 residents, 2016

summary(hate$hate_crimes_per_100k_splc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.06745 0.14271 0.22620 0.30409 0.35694 1.52230       4

Average Annual Hate Crimes per 100,000 residents, 2010-2105

summary(hate$avg_hatecrimes_per_100k_fbi)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.2669  1.2931  1.9871  2.3676  3.1843 10.9535       1

Explore how many states there are which has a higher median income than the median household income

table(hate$median_household_income>54916)
## 
## FALSE  TRUE 
##    26    25

Sort data to identify which states lead in several categories

5 States w/the Lowest Median Household Incomes

#create a subset of data
income <- subset(hate, select = c(state,median_household_income))
#sort by median household income
income_sort <- income[order(income$median_household_income),]
head(income_sort,n=5)
##            state median_household_income
## 25   Mississippi                   35521
## 49 West Virginia                   39552
## 1        Alabama                   42278
## 19     Louisiana                   42406
## 18      Kentucky                   42786

The 5 States w/the Highest Median Household Incomes

tail(income_sort,n=5)
##                   state median_household_income
## 9  District of Columbia                   68277
## 7           Connecticut                   70161
## 12               Hawaii                   71223
## 30        New Hampshire                   73397
## 21             Maryland                   76165

We can conclude that Maryland has the largest median household income with 76165 while Mississippi has the smallest median household income with 35521.

The 5 States w/the Lowest Rate of White Poverty

#create a subset of data
poverty <- subset(hate, select = c(state,share_white_poverty))
#sort by states with populations of white poverty
poverty_sort <- poverty[order(poverty$share_white_poverty),]
head(poverty_sort,n=5)
##                   state share_white_poverty
## 9  District of Columbia                0.04
## 24            Minnesota                0.05
## 2                Alaska                0.06
## 7           Connecticut                0.06
## 21             Maryland                0.06

The 5 States w/the Highest Rate of White Poverty

tail(poverty_sort,n=5)
##            state share_white_poverty
## 20         Maine                0.12
## 43     Tennessee                0.13
## 25   Mississippi                0.14
## 49 West Virginia                0.14
## 18      Kentucky                0.17

The District of Columbia had the lowest population of whites living in poverty with 0.04 while Kentucky had the highest population with 0.17.

The 5 States w/the Lowest Population of Trump Voters

#create a subset of data
trump <- subset(hate, select = c(state,share_voters_voted_trump))
#sort by states with populations of white poverty
trump_sort <- trump[order(trump$share_voters_voted_trump),]
head(trump_sort,n=5)
##                   state share_voters_voted_trump
## 9  District of Columbia                     0.04
## 12               Hawaii                     0.30
## 5            California                     0.33
## 46              Vermont                     0.33
## 22        Massachusetts                     0.34

The 5 States w/the Highest Population of Trump Voters

tail(trump_sort,n=5)
##            state share_voters_voted_trump
## 18      Kentucky                     0.63
## 35  North Dakota                     0.64
## 37      Oklahoma                     0.65
## 49 West Virginia                     0.69
## 51       Wyoming                     0.70

The District of Columbia had the lowest population of Trump voters with 0.04 while Wyoming had the highest population with 0.70.

Create two datasets Where the Median Household Incomes are Above/Below 54916

Histograms for Lower/Higher States w/Non-Citizens

lower_income <- subset(hate, median_household_income < 54916)
higher_income <- subset(hate, median_household_income > 54916)

Compare Non-Citizens in Lower Income States & Higher Income States

hist(lower_income$share_non_citizen,
     #border=NA,
     col="beige",#Or use: col=colors()[18]
     main="Population of Non-Citizens in Lower Income States",
     xlab="Population")

hist(higher_income$share_non_citizen,
     #border=NA,
     col="beige",#Or use: col=colors()[18]
     main="Population of Non-Citizens in Higher Income States",
     xlab="Population")

Creating Box Plots for Hate Crimes Per 100K

boxplot(lower_income$hate_crimes_per_100k_splc,
        col="beige",
        notch=T,
        horizontal=T,
        main="Hate Crimes Per 100K Residents",
        xlab="Population of Lower Income States")

boxplot(higher_income$hate_crimes_per_100k_splc,
        col="beige",
        notch=T,
        horizontal=T,
        main="Hate Crimes Per 100K Residents",
        xlab="Population of Higher Income States")

Creating Box Plots for Average Hate Crimes Per 100K

boxplot(lower_income$avg_hatecrimes_per_100k_fbi,
        col="beige",
        notch=T,
        horizontal=T,
        main="Average Hate Crimes Per 100K/FBI Statistics",
        xlab="Population of Lower Income States")

boxplot(higher_income$avg_hatecrimes_per_100k_fbi,
        col="beige",
        notch=T,
        horizontal=T,
        main="Average Hate Crimes Per 100K/FBI Statistics",
        xlab="Population of Higher Income States")

As per research, the Gini coefficien is a measure of statistical dispersion intended to represent the income or wealth distribution of a nations residents, and is the most commonly used measure of inequality.

Evaluating the Normal Distribution of the Gini Index

Gini Index

qqnorm(lower_income$gini_index, main="Gini Index: Lower Population")
qqline(lower_income$gini_index)

qqnorm(higher_income$gini_index, main="Gini Index: Higher Population")
qqline(higher_income$gini_index)

Trump Voters

qqnorm(lower_income$share_voters_voted_trump, main="Lower Income Population: Trump Voters")
qqline(lower_income$share_voters_voted_trump)

qqnorm(higher_income$share_voters_voted_trump, main="Higher Income Population: Trump Voters")
qqline(higher_income$share_voters_voted_trump)

Adults Over 25 w/High School Diploma

qqnorm(lower_income$share_population_with_high_school_degree, main="Lower Income Population: AdultsOver 25 w/HS Degree")
qqline(lower_income$share_population_with_high_school_degree)

#Confidence Intervals

Average Hate Crimes, FBI

fbi <- hate$avg_hatecrimes_per_100k_fbi
samp_fbi <- sample(fbi, 20)
sample_mean_fbi <-mean(samp_fbi)
se <-sd(samp_fbi)/sqrt(20)
lower <-sample_mean_fbi - 1.96 * se
upper <-sample_mean_fbi + 1.96 * se
c(lower,upper)
## [1] 1.282219 3.307578

Median Household Income

mhi <- hate$median_household_income
samp_mhi <- sample(mhi, 20)
sample_mean_mhi <-mean(samp_mhi)
se <-sd(samp_mhi)/sqrt(20)
lower <-sample_mean_mhi - 1.96 * se
upper <-sample_mean_mhi + 1.96 * se
c(lower,upper)
## [1] 52110.25 59617.85

Linear Regression

The correlation between the median household income and the population of adults with a high school degree

cor(hate$median_household_income,hate$share_population_with_high_school_degree)
## [1] 0.65349
inc_deg <-lm(hate$share_population_with_high_school_degree ~ hate$median_household_income, data=hate)
summary(inc_deg)
## 
## Call:
## lm(formula = hate$share_population_with_high_school_degree ~ 
##     hate$median_household_income, data = hate)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.075845 -0.017330 -0.000675  0.018116  0.048849 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  7.356e-01  2.240e-02  32.846  < 2e-16 ***
## hate$median_household_income 2.418e-06  4.001e-07   6.043 2.01e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02605 on 49 degrees of freedom
## Multiple R-squared:  0.427,  Adjusted R-squared:  0.4154 
## F-statistic: 36.52 on 1 and 49 DF,  p-value: 2.007e-07
plot(hate$share_population_with_high_school_degree ~ hate$median_household_income)
abline(inc_deg)