library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
Abbreviated_Voter_Dataset_Labeled <- read_csv("Downloads/Abbreviated Voter Dataset Labeled.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_character(),
## NumChildren = col_double(),
## Immigr_Economy_GiveTake = col_double(),
## ft_fem_2017 = col_double(),
## ft_immig_2017 = col_double(),
## ft_police_2017 = col_double(),
## ft_dem_2017 = col_double(),
## ft_rep_2017 = col_double(),
## ft_evang_2017 = col_double(),
## ft_muslim_2017 = col_double(),
## ft_jew_2017 = col_double(),
## ft_christ_2017 = col_double(),
## ft_gays_2017 = col_double(),
## ft_unions_2017 = col_double(),
## ft_altright_2017 = col_double(),
## ft_black_2017 = col_double(),
## ft_white_2017 = col_double(),
## ft_hisp_2017 = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
I hypothesize that there is a relationship between race and education. This is important to study because if there is a level of inequality of education between race groups, there will be inequality of outcomes that are education-driven.
My independent variable (IV) is race and my dependent variable (DV) is education.
table(Abbreviated_Voter_Dataset_Labeled $ race) %>%
prop.table(1)
##
## Asian Black HIspanic
## 1 1 1
## Middle Eastern Mixed Native American Mixed
## 1 1 1
## Other White
## 1 1
table(Abbreviated_Voter_Dataset_Labeled $ education) %>%
prop.table(1)
##
## 2-year 4-year High School Graduate
## 1 1 1
## No High School Post Grad Some College
## 1 1 1
table(Abbreviated_Voter_Dataset_Labeled $ race )
##
## Asian Black HIspanic
## 120 673 408
## Middle Eastern Mixed Native American Mixed
## 11 166 61
## Other White
## 142 6402
table(Abbreviated_Voter_Dataset_Labeled $ education)
##
## 2-year 4-year High School Graduate
## 784 1972 1977
## No High School Post Grad Some College
## 166 1210 1874
table(Abbreviated_Voter_Dataset_Labeled $ race,Abbreviated_Voter_Dataset_Labeled $ education )
##
## 2-year 4-year High School Graduate No High School
## Asian 11 60 7 0
## Black 93 159 125 9
## HIspanic 54 101 74 7
## Middle Eastern 0 5 0 0
## Mixed 19 45 25 4
## Native American Mixed 14 12 10 1
## Other 15 33 17 3
## White 577 1553 1717 142
##
## Post Grad Some College
## Asian 26 16
## Black 76 210
## HIspanic 59 112
## Middle Eastern 5 1
## Mixed 26 47
## Native American Mixed 7 17
## Other 30 44
## White 978 1422
table(Abbreviated_Voter_Dataset_Labeled $ race,Abbreviated_Voter_Dataset_Labeled $ education ) %>%
prop.table(1)
##
## 2-year 4-year High School Graduate
## Asian 0.09166667 0.50000000 0.05833333
## Black 0.13839286 0.23660714 0.18601190
## HIspanic 0.13267813 0.24815725 0.18181818
## Middle Eastern 0.00000000 0.45454545 0.00000000
## Mixed 0.11445783 0.27108434 0.15060241
## Native American Mixed 0.22950820 0.19672131 0.16393443
## Other 0.10563380 0.23239437 0.11971831
## White 0.09031147 0.24307403 0.26874315
##
## No High School Post Grad Some College
## Asian 0.00000000 0.21666667 0.13333333
## Black 0.01339286 0.11309524 0.31250000
## HIspanic 0.01719902 0.14496314 0.27518428
## Middle Eastern 0.00000000 0.45454545 0.09090909
## Mixed 0.02409639 0.15662651 0.28313253
## Native American Mixed 0.01639344 0.11475410 0.27868852
## Other 0.02112676 0.21126761 0.30985915
## White 0.02222570 0.15307560 0.22257004
Abbreviated_Voter_Dataset_Labeled %>%
group_by(race,education) %>%
summarize(n=n()) %>%
mutate(percent=n/sum(n)) %>%
ggplot()+
geom_col(aes(x= race, y= percent, fill= education))
## `summarise()` has grouped output by 'race'. You can override using the `.groups` argument.
###### Chi-Square Statistical Test
chisq.test(Abbreviated_Voter_Dataset_Labeled $ race,Abbreviated_Voter_Dataset_Labeled $ education )
## Warning in chisq.test(Abbreviated_Voter_Dataset_Labeled$race,
## Abbreviated_Voter_Dataset_Labeled$education): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: Abbreviated_Voter_Dataset_Labeled$race and Abbreviated_Voter_Dataset_Labeled$education
## X-squared = 195.82, df = 35, p-value < 2.2e-16