We are working with the Voter Turnout Data set from 2016 to 2018. We obtained the data from the Census Bureau and Department of Labor Statistics which was collected on a sample of Americans every two years immediately following the November election. This Data set is important because it provides details in to why people did or did not vote in the election. Over the past four years we have seen the power that comes from voting. By understanding the factors that impact someones decision to vote or not, we can encourage certain demographics to vote.
library(tidyverse)
## ── Attaching packages ─────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
data1618<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_2016_2018.csv",
header=TRUE)
dim(data1618)
## [1] 152824 28
names(data1618)
## [1] "YEAR" "STATEFIP" "METRO" "AGE" "SEX"
## [6] "RACE" "MARST" "VETSTAT" "CITIZEN" "HISPAN"
## [11] "LABFORCE" "EDUC99" "EDCYC" "EDDIPGED" "EDHGCGED"
## [16] "SCHLCOLL" "VOWHYNOT" "VOYNOTREG" "VOTEHOW" "VOTEWHEN"
## [21] "VOREGHOW" "VOTED" "VOREG" "VOSUPPWT" "MARRSIMPLE"
## [26] "RACESIMPLE" "HISPSIMPLE" "EDUSIMPLE"
#view(data1618)
dd<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_dictionary.csv",
header=TRUE,
stringsAsFactors = FALSE)
#view(dd)
str(dd)
## 'data.frame': 309 obs. of 4 variables:
## $ variable: chr "YEAR" "STATEFIP" "STATEFIP" "STATEFIP" ...
## $ value : int NA NA 1 2 4 5 6 8 9 10 ...
## $ label : chr "Survey year" "State (FIPS code)" "Alabama" "Alaska" ...
## $ meta : chr "varlab" "varlab" "" "" ...
## STATE FIP CODES
STATEFIP<-dd%>%
filter(variable=="STATEFIP")
STATEFIP<-STATEFIP[-1, 2:3]
colnames(STATEFIP)<-c("STATEFIP", "State")
## METRO CODES
METRO<-dd%>%
filter(variable=="METRO")
METRO<-METRO[-1, 2:3]
colnames(METRO)<-c("METRO", "Metro")
## RACE CODES
RACE<-dd%>%
filter(variable=="RACE")
RACE<-RACE[-1, 2:3]
colnames(RACE)<-c("RACE", "Race")
#### We might prefer to use the simplified RACE variable
RACESIMP<-dd%>%
filter(variable=="RACESIMPLE")
RACESIMP<-RACESIMP[-1, 2:3]
colnames(RACESIMP)<-c("RACESIMPLE", "RaceSimp")
## Martial Status (MARST) CODES
MARST<-dd%>%
filter(variable=="MARST")
MARST<-MARST[-1, 2:3]
colnames(MARST)<-c("MARST", "Martial")
#### We might prefer to use the simplified MARST variable
MARRSIMP<-dd%>%
filter(variable=="MARRSIMPLE")
MARRSIMP<-MARRSIMP[-1, 2:3]
colnames(MARRSIMP)<-c("MARRSIMPLE", "MartialSimp")
## VETSTAT CODES
VETSTAT<-dd%>%
filter(variable=="VETSTAT")
VETSTAT<-VETSTAT[-1, 2:3]
colnames(VETSTAT)<-c("VETSTAT", "Vet")
## CITIZEN CODES
CITIZEN<-dd%>%
filter(variable=="CITIZEN")
CITIZEN<-CITIZEN[-1, 2:3]
colnames(CITIZEN)<-c("CITIZEN", "Citizen")
## HISPAN CODES
HISPAN<-dd%>%
filter(variable=="HISPAN")
HISPAN<-HISPAN[-1, 2:3]
colnames(HISPAN)<-c("HISPAN", "Hispanic")
#### We might prefer to use the simplified HISPAN variable
HISPSIMP<-dd%>%
filter(variable=="HISPSIMPLE")
HISPSIMP<-HISPSIMP[-1, 2:3]
colnames(HISPSIMP)<-c("HISPSIMPLE", "HispanSimp")
## LABFORCE CODES
LABFORCE<-dd%>%
filter(variable=="LABFORCE")
LABFORCE<-LABFORCE[-1, 2:3]
colnames(LABFORCE)<-c("LABFORCE", "Labor")
## EDUC99 CODES (Education Attainment)
EDUC99<-dd%>%
filter(variable=="EDUC99")
EDUC99<-EDUC99[-1, 2:3]
colnames(EDUC99)<-c("EDUC99", "Edu1990")
## EDCYC CODES (Years of college credit)
EDCYC<-dd%>%
filter(variable=="EDCYC")
EDCYC<-EDCYC[-1, 2:3]
colnames(EDCYC)<-c("EDCYC", "College")
## EDDIPGED CODES (Highschool or GED)
EDDIPGED<-dd%>%
filter(variable=="EDDIPGED")
EDDIPGED<-EDDIPGED[-1, 2:3]
colnames(EDDIPGED)<-c("EDDIPGED", "HighGED")
## EDHGCGED CODES (Highest grade before GED)
EDHGCGED<-dd%>%
filter(variable=="EDHGCGED")
EDHGCGED<-EDHGCGED[-1, 2:3]
colnames(EDHGCGED)<-c("EDHGCGED", "HighestGrade")
#### We might prefer to use the simplified EDU variable
EDUSIMPLE<-dd%>%
filter(variable=="EDUSIMPLE")
EDUSIMPLE<-EDUSIMPLE[-1, 2:3]
colnames(EDUSIMPLE)<-c("EDUSIMPLE", "EduSimp")
## SCHLCOLL CODES (School or college attendance)
SCHLCOLL<-dd%>%
filter(variable=="SCHLCOLL")
SCHLCOLL<-SCHLCOLL[-1, 2:3]
colnames(SCHLCOLL)<-c("SCHLCOLL", "SchoolAttend")
## Reason why eligible voter did not vote
VOWHYNOT<-dd%>%
filter(variable=="VOWHYNOT")
VOWHYNOT<-VOWHYNOT[-1, 2:3]
colnames(VOWHYNOT)<-c("VOWHYNOT", "WhyNotVote")
VOWHYNOT[8,2]<-"Registration Problems"
## Reason why eligible voter did not register to vote
VOYNOTREG<-dd%>%
filter(variable=="VOYNOTREG")
VOYNOTREG<-VOYNOTREG[-1, 2:3]
colnames(VOYNOTREG)<-c("VOYNOTREG", "WhyNotReg")
## Method of voting in the most recent November election
VOTEHOW<-dd%>%
filter(variable=="VOTEHOW")
VOTEHOW<-VOTEHOW[-1, 2:3]
colnames(VOTEHOW)<-c("VOTEHOW", "MethodVote")
## Voted on or before election day
VOTEWHEN<-dd%>%
filter(variable=="VOTEWHEN")
VOTEWHEN<-VOTEWHEN[-1, 2:3]
colnames(VOTEWHEN)<-c("VOTEWHEN", "VoteWhen")
## Method of registering to vote
VOREGHOW<-dd%>%
filter(variable=="VOREGHOW")
VOREGHOW<-VOREGHOW[-1, 2:3]
colnames(VOREGHOW)<-c("VOREGHOW", "MethodReg")
## Voted for the most recent November election
VOTED<-dd%>%
filter(variable=="VOTED")
VOTED<-VOTED[-1, 2:3]
colnames(VOTED)<-c("VOTED", "Voted")
## Registered for the most recent November election
VOREG<-dd%>%
filter(variable=="VOREG")
VOREG<-VOREG[-1, 2:3]
colnames(VOREG)<-c("VOREG", "Registered")
##### Select Columns and join for labels
trim1618<-data1618%>%
select(YEAR, STATEFIP, METRO, AGE, SEX,
RACESIMPLE, MARRSIMPLE, VETSTAT, CITIZEN,
HISPSIMPLE, LABFORCE, EDUSIMPLE, SCHLCOLL,
VOWHYNOT, VOYNOTREG, VOTEHOW, VOTEWHEN,
VOREGHOW, VOTED, VOREG,VOSUPPWT)%>%
left_join(STATEFIP)%>%
left_join(METRO)%>%
left_join(RACESIMP)%>%
left_join(MARRSIMP)%>%
left_join(VETSTAT)%>%
left_join(CITIZEN)%>%
left_join(HISPSIMP)%>%
left_join(LABFORCE)%>%
left_join(EDUSIMPLE)%>%
left_join(SCHLCOLL)%>%
left_join(VOWHYNOT)%>%
left_join(VOYNOTREG)%>%
left_join(VOTEHOW)%>%
left_join(VOTEWHEN)%>%
left_join(VOREGHOW)%>%
left_join(VOTED)%>%
left_join(VOREG)
## Joining, by = "STATEFIP"
## Joining, by = "METRO"
## Joining, by = "RACESIMPLE"
## Joining, by = "MARRSIMPLE"
## Joining, by = "VETSTAT"
## Joining, by = "CITIZEN"
## Joining, by = "HISPSIMPLE"
## Joining, by = "LABFORCE"
## Joining, by = "EDUSIMPLE"
## Joining, by = "SCHLCOLL"
## Joining, by = "VOWHYNOT"
## Joining, by = "VOYNOTREG"
## Joining, by = "VOTEHOW"
## Joining, by = "VOTEWHEN"
## Joining, by = "VOREGHOW"
## Joining, by = "VOTED"
## Joining, by = "VOREG"
sumWhy<-trim1618%>%
filter(WhyNotVote!="NIU")%>%
group_by(YEAR, WhyNotVote)%>%
summarise(n=n())
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
ggplot(sumWhy, aes(x=reorder(WhyNotVote, n), y=n, fill=as.factor(YEAR)))+
geom_bar(stat="identity", position="dodge2")+
#facet_grid(.~YEAR)+
coord_flip()+
#theme(legend.position = "none")+
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank())+
ggtitle("Why don't people vote?")
#### Education and Voter Turn Out
ggplot(trim1618, aes(Voted, fill=EduSimp))+
geom_bar(position = "fill")+
facet_grid(.~YEAR)
We can see that people with higher education has voted more in 2016 and in 2018
ggplot(trim1618, aes(VoteWhen, fill=EduSimp))+
geom_bar(position = "fill")+
facet_grid(.~YEAR)
This graph shows us when people voted compared to their education level. In 2018 it shows that people with a Bachelors degree did not respond. Also, in 2016 there are some people with an Associate degree that did not respond, but in 2018, there are no people who did not respond with an Associate degree.
ggplot(trim1618, aes(x=AGE, fill=Voted))+
geom_boxplot()
m1 <- glm(as.factor(Voted) ~ AGE, data = trim1618, family = "binomial")
summary(m1)
##
## Call:
## glm(formula = as.factor(Voted) ~ AGE, family = "binomial", data = trim1618)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9493 -1.2968 0.7173 0.8747 1.1328
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3325070 0.0161454 -20.59 <2e-16 ***
## AGE 0.0243576 0.0003231 75.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 186890 on 152823 degrees of freedom
## Residual deviance: 180929 on 152822 degrees of freedom
## AIC: 180933
##
## Number of Fisher Scoring iterations: 4
From the boxplot and the linear model, it shows that older people tend to vote more. This is interesting because looking at this data, we can reach out to the yougner generations and encourage them to vote.