Introduction

We are working with the Voter Turnout Data set from 2016 to 2018. We obtained the data from the Census Bureau and Department of Labor Statistics which was collected on a sample of Americans every two years immediately following the November election. This Data set is important because it provides details in to why people did or did not vote in the election. Over the past four years we have seen the power that comes from voting. By understanding the factors that impact someones decision to vote or not, we can encourage certain demographics to vote.

library(tidyverse)
## ── Attaching packages ─────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
data1618<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_2016_2018.csv",
                   header=TRUE)
dim(data1618)
## [1] 152824     28
names(data1618)
##  [1] "YEAR"       "STATEFIP"   "METRO"      "AGE"        "SEX"       
##  [6] "RACE"       "MARST"      "VETSTAT"    "CITIZEN"    "HISPAN"    
## [11] "LABFORCE"   "EDUC99"     "EDCYC"      "EDDIPGED"   "EDHGCGED"  
## [16] "SCHLCOLL"   "VOWHYNOT"   "VOYNOTREG"  "VOTEHOW"    "VOTEWHEN"  
## [21] "VOREGHOW"   "VOTED"      "VOREG"      "VOSUPPWT"   "MARRSIMPLE"
## [26] "RACESIMPLE" "HISPSIMPLE" "EDUSIMPLE"
#view(data1618)
Data Dictionary
dd<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_dictionary.csv",
             header=TRUE, 
             stringsAsFactors = FALSE)
#view(dd)
str(dd)
## 'data.frame':    309 obs. of  4 variables:
##  $ variable: chr  "YEAR" "STATEFIP" "STATEFIP" "STATEFIP" ...
##  $ value   : int  NA NA 1 2 4 5 6 8 9 10 ...
##  $ label   : chr  "Survey year" "State (FIPS code)" "Alabama" "Alaska" ...
##  $ meta    : chr  "varlab" "varlab" "" "" ...
Variable Labels
## STATE FIP CODES
STATEFIP<-dd%>%
  filter(variable=="STATEFIP")

STATEFIP<-STATEFIP[-1, 2:3]
colnames(STATEFIP)<-c("STATEFIP", "State")

## METRO CODES
METRO<-dd%>%
  filter(variable=="METRO")

METRO<-METRO[-1, 2:3]
colnames(METRO)<-c("METRO", "Metro")

## RACE CODES
RACE<-dd%>%
  filter(variable=="RACE")

RACE<-RACE[-1, 2:3]
colnames(RACE)<-c("RACE", "Race")

#### We might prefer to use the simplified RACE variable 
RACESIMP<-dd%>%
  filter(variable=="RACESIMPLE")

RACESIMP<-RACESIMP[-1, 2:3]
colnames(RACESIMP)<-c("RACESIMPLE", "RaceSimp")

## Martial Status (MARST) CODES
MARST<-dd%>%
  filter(variable=="MARST")

MARST<-MARST[-1, 2:3]
colnames(MARST)<-c("MARST", "Martial")

#### We might prefer to use the simplified MARST variable 
MARRSIMP<-dd%>%
  filter(variable=="MARRSIMPLE")

MARRSIMP<-MARRSIMP[-1, 2:3]
colnames(MARRSIMP)<-c("MARRSIMPLE", "MartialSimp")

## VETSTAT CODES
VETSTAT<-dd%>%
  filter(variable=="VETSTAT")

VETSTAT<-VETSTAT[-1, 2:3]
colnames(VETSTAT)<-c("VETSTAT", "Vet")

## CITIZEN CODES
CITIZEN<-dd%>%
  filter(variable=="CITIZEN")

CITIZEN<-CITIZEN[-1, 2:3]
colnames(CITIZEN)<-c("CITIZEN", "Citizen")

## HISPAN CODES
HISPAN<-dd%>%
  filter(variable=="HISPAN")

HISPAN<-HISPAN[-1, 2:3]
colnames(HISPAN)<-c("HISPAN", "Hispanic")

#### We might prefer to use the simplified HISPAN variable 
HISPSIMP<-dd%>%
  filter(variable=="HISPSIMPLE")

HISPSIMP<-HISPSIMP[-1, 2:3]
colnames(HISPSIMP)<-c("HISPSIMPLE", "HispanSimp")

## LABFORCE CODES
LABFORCE<-dd%>%
  filter(variable=="LABFORCE")

LABFORCE<-LABFORCE[-1, 2:3]
colnames(LABFORCE)<-c("LABFORCE", "Labor")

## EDUC99 CODES (Education Attainment)
EDUC99<-dd%>%
  filter(variable=="EDUC99")

EDUC99<-EDUC99[-1, 2:3]
colnames(EDUC99)<-c("EDUC99", "Edu1990")

## EDCYC CODES (Years of college credit)
EDCYC<-dd%>%
  filter(variable=="EDCYC")

EDCYC<-EDCYC[-1, 2:3]
colnames(EDCYC)<-c("EDCYC", "College")

## EDDIPGED CODES (Highschool or GED)
EDDIPGED<-dd%>%
  filter(variable=="EDDIPGED")

EDDIPGED<-EDDIPGED[-1, 2:3]
colnames(EDDIPGED)<-c("EDDIPGED", "HighGED")

## EDHGCGED CODES (Highest grade before GED)
EDHGCGED<-dd%>%
  filter(variable=="EDHGCGED")

EDHGCGED<-EDHGCGED[-1, 2:3]
colnames(EDHGCGED)<-c("EDHGCGED", "HighestGrade")

#### We might prefer to use the simplified EDU variable 
EDUSIMPLE<-dd%>%
  filter(variable=="EDUSIMPLE")

EDUSIMPLE<-EDUSIMPLE[-1, 2:3]
colnames(EDUSIMPLE)<-c("EDUSIMPLE", "EduSimp")

## SCHLCOLL CODES (School or college attendance)
SCHLCOLL<-dd%>%
  filter(variable=="SCHLCOLL")

SCHLCOLL<-SCHLCOLL[-1, 2:3]
colnames(SCHLCOLL)<-c("SCHLCOLL", "SchoolAttend")

## Reason why eligible voter did not vote
VOWHYNOT<-dd%>%
  filter(variable=="VOWHYNOT")

VOWHYNOT<-VOWHYNOT[-1, 2:3]
colnames(VOWHYNOT)<-c("VOWHYNOT", "WhyNotVote")
VOWHYNOT[8,2]<-"Registration Problems"

## Reason why eligible voter did not register to vote
VOYNOTREG<-dd%>%
  filter(variable=="VOYNOTREG")

VOYNOTREG<-VOYNOTREG[-1, 2:3]
colnames(VOYNOTREG)<-c("VOYNOTREG", "WhyNotReg")

## Method of voting in the most recent November election
VOTEHOW<-dd%>%
  filter(variable=="VOTEHOW")

VOTEHOW<-VOTEHOW[-1, 2:3]
colnames(VOTEHOW)<-c("VOTEHOW", "MethodVote")

## Voted on or before election day
VOTEWHEN<-dd%>%
  filter(variable=="VOTEWHEN")

VOTEWHEN<-VOTEWHEN[-1, 2:3]
colnames(VOTEWHEN)<-c("VOTEWHEN", "VoteWhen")

## Method of registering to vote
VOREGHOW<-dd%>%
  filter(variable=="VOREGHOW")

VOREGHOW<-VOREGHOW[-1, 2:3]
colnames(VOREGHOW)<-c("VOREGHOW", "MethodReg")

## Voted for the most recent November election
VOTED<-dd%>%
  filter(variable=="VOTED")

VOTED<-VOTED[-1, 2:3]
colnames(VOTED)<-c("VOTED", "Voted")

## Registered for the most recent November election
VOREG<-dd%>%
  filter(variable=="VOREG")

VOREG<-VOREG[-1, 2:3]
colnames(VOREG)<-c("VOREG", "Registered")
##### Select Columns and join for labels
trim1618<-data1618%>%
  select(YEAR, STATEFIP, METRO, AGE, SEX, 
         RACESIMPLE, MARRSIMPLE, VETSTAT, CITIZEN, 
         HISPSIMPLE, LABFORCE, EDUSIMPLE, SCHLCOLL, 
         VOWHYNOT, VOYNOTREG, VOTEHOW, VOTEWHEN, 
         VOREGHOW, VOTED, VOREG,VOSUPPWT)%>%
  left_join(STATEFIP)%>%
  left_join(METRO)%>%
  left_join(RACESIMP)%>%
  left_join(MARRSIMP)%>%
  left_join(VETSTAT)%>%
  left_join(CITIZEN)%>%
  left_join(HISPSIMP)%>%
  left_join(LABFORCE)%>%
  left_join(EDUSIMPLE)%>%
  left_join(SCHLCOLL)%>%
  left_join(VOWHYNOT)%>%
  left_join(VOYNOTREG)%>%
  left_join(VOTEHOW)%>%
  left_join(VOTEWHEN)%>%
  left_join(VOREGHOW)%>%
  left_join(VOTED)%>%
  left_join(VOREG)
## Joining, by = "STATEFIP"
## Joining, by = "METRO"
## Joining, by = "RACESIMPLE"
## Joining, by = "MARRSIMPLE"
## Joining, by = "VETSTAT"
## Joining, by = "CITIZEN"
## Joining, by = "HISPSIMPLE"
## Joining, by = "LABFORCE"
## Joining, by = "EDUSIMPLE"
## Joining, by = "SCHLCOLL"
## Joining, by = "VOWHYNOT"
## Joining, by = "VOYNOTREG"
## Joining, by = "VOTEHOW"
## Joining, by = "VOTEWHEN"
## Joining, by = "VOREGHOW"
## Joining, by = "VOTED"
## Joining, by = "VOREG"

Why don’t People Vote?

sumWhy<-trim1618%>%
  filter(WhyNotVote!="NIU")%>%
  group_by(YEAR, WhyNotVote)%>%
  summarise(n=n())
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
ggplot(sumWhy, aes(x=reorder(WhyNotVote, n), y=n, fill=as.factor(YEAR)))+
  geom_bar(stat="identity", position="dodge2")+
  #facet_grid(.~YEAR)+
  coord_flip()+ 
  #theme(legend.position = "none")+
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank())+
  ggtitle("Why don't people vote?")

#### Education and Voter Turn Out

ggplot(trim1618, aes(Voted, fill=EduSimp))+
  geom_bar(position = "fill")+
  facet_grid(.~YEAR)

We can see that people with higher education has voted more in 2016 and in 2018

Education and When People Voted

ggplot(trim1618, aes(VoteWhen, fill=EduSimp))+
  geom_bar(position = "fill")+
  facet_grid(.~YEAR)

This graph shows us when people voted compared to their education level. In 2018 it shows that people with a Bachelors degree did not respond. Also, in 2016 there are some people with an Associate degree that did not respond, but in 2018, there are no people who did not respond with an Associate degree.

Age and Voter Turn Out

ggplot(trim1618, aes(x=AGE, fill=Voted))+
  geom_boxplot()

m1 <- glm(as.factor(Voted) ~ AGE, data = trim1618, family = "binomial")
summary(m1)
## 
## Call:
## glm(formula = as.factor(Voted) ~ AGE, family = "binomial", data = trim1618)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9493  -1.2968   0.7173   0.8747   1.1328  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.3325070  0.0161454  -20.59   <2e-16 ***
## AGE          0.0243576  0.0003231   75.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 186890  on 152823  degrees of freedom
## Residual deviance: 180929  on 152822  degrees of freedom
## AIC: 180933
## 
## Number of Fisher Scoring iterations: 4

From the boxplot and the linear model, it shows that older people tend to vote more. This is interesting because looking at this data, we can reach out to the yougner generations and encourage them to vote.