library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
Abbreviated_Voter_Dataset_Labeled <- read_csv("Downloads/Abbreviated Voter Dataset Labeled.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   NumChildren = col_double(),
##   Immigr_Economy_GiveTake = col_double(),
##   ft_fem_2017 = col_double(),
##   ft_immig_2017 = col_double(),
##   ft_police_2017 = col_double(),
##   ft_dem_2017 = col_double(),
##   ft_rep_2017 = col_double(),
##   ft_evang_2017 = col_double(),
##   ft_muslim_2017 = col_double(),
##   ft_jew_2017 = col_double(),
##   ft_christ_2017 = col_double(),
##   ft_gays_2017 = col_double(),
##   ft_unions_2017 = col_double(),
##   ft_altright_2017 = col_double(),
##   ft_black_2017 = col_double(),
##   ft_white_2017 = col_double(),
##   ft_hisp_2017 = col_double()
## )
## ℹ Use `spec()` for the full column specifications.

Introduction

IV & DV Response Summary

table(Abbreviated_Voter_Dataset_Labeled $ race) %>%
  prop.table(1)
## 
##                 Asian                 Black              HIspanic 
##                     1                     1                     1 
##        Middle Eastern                 Mixed Native American Mixed 
##                     1                     1                     1 
##                 Other                 White 
##                     1                     1
table(Abbreviated_Voter_Dataset_Labeled $ education) %>%
  prop.table(1)
## 
##               2-year               4-year High School Graduate 
##                    1                    1                    1 
##       No High School            Post Grad         Some College 
##                    1                    1                    1

Expected Values

table(Abbreviated_Voter_Dataset_Labeled $ race )
## 
##                 Asian                 Black              HIspanic 
##                   120                   673                   408 
##        Middle Eastern                 Mixed Native American Mixed 
##                    11                   166                    61 
##                 Other                 White 
##                   142                  6402
table(Abbreviated_Voter_Dataset_Labeled $ education)
## 
##               2-year               4-year High School Graduate 
##                  784                 1972                 1977 
##       No High School            Post Grad         Some College 
##                  166                 1210                 1874

Observed Values

table(Abbreviated_Voter_Dataset_Labeled $ race,Abbreviated_Voter_Dataset_Labeled $ education )
##                        
##                         2-year 4-year High School Graduate No High School
##   Asian                     11     60                    7              0
##   Black                     93    159                  125              9
##   HIspanic                  54    101                   74              7
##   Middle Eastern             0      5                    0              0
##   Mixed                     19     45                   25              4
##   Native American Mixed     14     12                   10              1
##   Other                     15     33                   17              3
##   White                    577   1553                 1717            142
##                        
##                         Post Grad Some College
##   Asian                        26           16
##   Black                        76          210
##   HIspanic                     59          112
##   Middle Eastern                5            1
##   Mixed                        26           47
##   Native American Mixed         7           17
##   Other                        30           44
##   White                       978         1422
  • The expected values were higher compared to the actual values.
Calculating % Row
table(Abbreviated_Voter_Dataset_Labeled $ race,Abbreviated_Voter_Dataset_Labeled $ education ) %>%
  prop.table(1)
##                        
##                             2-year     4-year High School Graduate
##   Asian                 0.09166667 0.50000000           0.05833333
##   Black                 0.13839286 0.23660714           0.18601190
##   HIspanic              0.13267813 0.24815725           0.18181818
##   Middle Eastern        0.00000000 0.45454545           0.00000000
##   Mixed                 0.11445783 0.27108434           0.15060241
##   Native American Mixed 0.22950820 0.19672131           0.16393443
##   Other                 0.10563380 0.23239437           0.11971831
##   White                 0.09031147 0.24307403           0.26874315
##                        
##                         No High School  Post Grad Some College
##   Asian                     0.00000000 0.21666667   0.13333333
##   Black                     0.01339286 0.11309524   0.31250000
##   HIspanic                  0.01719902 0.14496314   0.27518428
##   Middle Eastern            0.00000000 0.45454545   0.09090909
##   Mixed                     0.02409639 0.15662651   0.28313253
##   Native American Mixed     0.01639344 0.11475410   0.27868852
##   Other                     0.02112676 0.21126761   0.30985915
##   White                     0.02222570 0.15307560   0.22257004
 Abbreviated_Voter_Dataset_Labeled %>%
  group_by(race,education) %>%
  summarize(n=n()) %>%
  mutate(percent=n/sum(n)) %>%
  ggplot()+
  geom_col(aes(x= race, y= percent, fill= education))
## `summarise()` has grouped output by 'race'. You can override using the `.groups` argument.

###### Chi-Square Statistical Test

chisq.test(Abbreviated_Voter_Dataset_Labeled $ race,Abbreviated_Voter_Dataset_Labeled $ education )
## Warning in chisq.test(Abbreviated_Voter_Dataset_Labeled$race,
## Abbreviated_Voter_Dataset_Labeled$education): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Abbreviated_Voter_Dataset_Labeled$race and Abbreviated_Voter_Dataset_Labeled$education
## X-squared = 195.82, df = 35, p-value < 2.2e-16
  • The results from the chi-square test are in favor of me supporting my initial hypothesis. There is a significant relationship, statistically speaking, between race and education because the p-value is less than .05. These variables are not independent of one another.