Download the data

I uploaded the data to my github:

library(tidyverse)

### this just puts the 2016 and 2018 datasets together
data1618<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_2016_2018.csv",
                   header=TRUE)
#dim(data1618)
#names(data1618)

## data dictionary
dd<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_dictionary.csv",
             header=TRUE, 
             stringsAsFactors = FALSE)

Variable Labels

Label dataframes:

## STATE FIP CODES
STATEFIP<-dd%>%
  filter(variable=="STATEFIP")

STATEFIP<-STATEFIP[-1, 2:3]
colnames(STATEFIP)<-c("STATEFIP", "State")

## METRO CODES
METRO<-dd%>%
  filter(variable=="METRO")

METRO<-METRO[-1, 2:3]
colnames(METRO)<-c("METRO", "Metro")

## RACE CODES
RACE<-dd%>%
  filter(variable=="RACE")

RACE<-RACE[-1, 2:3]
colnames(RACE)<-c("RACE", "Race")

#### We might prefer to use the simplified RACE variable 
RACESIMP<-dd%>%
  filter(variable=="RACESIMPLE")

RACESIMP<-RACESIMP[-1, 2:3]
colnames(RACESIMP)<-c("RACESIMPLE", "RaceSimp")

## Martial Status (MARST) CODES
MARST<-dd%>%
  filter(variable=="MARST")

MARST<-MARST[-1, 2:3]
colnames(MARST)<-c("MARST", "Martial")

#### We might prefer to use the simplified MARST variable 
MARRSIMP<-dd%>%
  filter(variable=="MARRSIMPLE")

MARRSIMP<-MARRSIMP[-1, 2:3]
colnames(MARRSIMP)<-c("MARRSIMPLE", "MartialSimp")

## VETSTAT CODES
VETSTAT<-dd%>%
  filter(variable=="VETSTAT")

VETSTAT<-VETSTAT[-1, 2:3]
colnames(VETSTAT)<-c("VETSTAT", "Vet")

## CITIZEN CODES
CITIZEN<-dd%>%
  filter(variable=="CITIZEN")

CITIZEN<-CITIZEN[-1, 2:3]
colnames(CITIZEN)<-c("CITIZEN", "Citizen")

## HISPAN CODES
HISPAN<-dd%>%
  filter(variable=="HISPAN")

HISPAN<-HISPAN[-1, 2:3]
colnames(HISPAN)<-c("HISPAN", "Hispanic")

#### We might prefer to use the simplified HISPAN variable 
HISPSIMP<-dd%>%
  filter(variable=="HISPSIMPLE")

HISPSIMP<-HISPSIMP[-1, 2:3]
colnames(HISPSIMP)<-c("HISPSIMPLE", "HispanSimp")

## LABFORCE CODES
LABFORCE<-dd%>%
  filter(variable=="LABFORCE")

LABFORCE<-LABFORCE[-1, 2:3]
colnames(LABFORCE)<-c("LABFORCE", "Labor")

## EDUC99 CODES (Education Attainment)
EDUC99<-dd%>%
  filter(variable=="EDUC99")

EDUC99<-EDUC99[-1, 2:3]
colnames(EDUC99)<-c("EDUC99", "Edu1990")

## EDCYC CODES (Years of college credit)
EDCYC<-dd%>%
  filter(variable=="EDCYC")

EDCYC<-EDCYC[-1, 2:3]
colnames(EDCYC)<-c("EDCYC", "College")

## EDDIPGED CODES (Highschool or GED)
EDDIPGED<-dd%>%
  filter(variable=="EDDIPGED")

EDDIPGED<-EDDIPGED[-1, 2:3]
colnames(EDDIPGED)<-c("EDDIPGED", "HighGED")

## EDHGCGED CODES (Highest grade before GED)
EDHGCGED<-dd%>%
  filter(variable=="EDHGCGED")

EDHGCGED<-EDHGCGED[-1, 2:3]
colnames(EDHGCGED)<-c("EDHGCGED", "HighestGrade")

#### We might prefer to use the simplified EDU variable 
EDUSIMPLE<-dd%>%
  filter(variable=="EDUSIMPLE")

EDUSIMPLE<-EDUSIMPLE[-1, 2:3]
colnames(EDUSIMPLE)<-c("EDUSIMPLE", "EduSimp")

## SCHLCOLL CODES (School or college attendance)
SCHLCOLL<-dd%>%
  filter(variable=="SCHLCOLL")

SCHLCOLL<-SCHLCOLL[-1, 2:3]
colnames(SCHLCOLL)<-c("SCHLCOLL", "SchoolAttend")

## Reason why eligible voter did not vote
VOWHYNOT<-dd%>%
  filter(variable=="VOWHYNOT")

VOWHYNOT<-VOWHYNOT[-1, 2:3]
colnames(VOWHYNOT)<-c("VOWHYNOT", "WhyNotVote")
VOWHYNOT[8,2]<-"Registration Problems"

## Reason why eligible voter did not register to vote
VOYNOTREG<-dd%>%
  filter(variable=="VOYNOTREG")

VOYNOTREG<-VOYNOTREG[-1, 2:3]
colnames(VOYNOTREG)<-c("VOYNOTREG", "WhyNotReg")

## Method of voting in the most recent November election
VOTEHOW<-dd%>%
  filter(variable=="VOTEHOW")

VOTEHOW<-VOTEHOW[-1, 2:3]
colnames(VOTEHOW)<-c("VOTEHOW", "MethodVote")

## Voted on or before election day
VOTEWHEN<-dd%>%
  filter(variable=="VOTEWHEN")

VOTEWHEN<-VOTEWHEN[-1, 2:3]
colnames(VOTEWHEN)<-c("VOTEWHEN", "VoteWhen")

## Method of registering to vote
VOREGHOW<-dd%>%
  filter(variable=="VOREGHOW")

VOREGHOW<-VOREGHOW[-1, 2:3]
colnames(VOREGHOW)<-c("VOREGHOW", "MethodReg")

## Voted for the most recent November election
VOTED<-dd%>%
  filter(variable=="VOTED")

VOTED<-VOTED[-1, 2:3]
colnames(VOTED)<-c("VOTED", "Voted")

## Registered for the most recent November election
VOREG<-dd%>%
  filter(variable=="VOREG")

VOREG<-VOREG[-1, 2:3]
colnames(VOREG)<-c("VOREG", "Registered")

Join Labels

##### Select Columns and join for labels
trim1618<-data1618%>%
  select(YEAR, STATEFIP, METRO, AGE, SEX, 
         RACESIMPLE, MARRSIMPLE, VETSTAT, CITIZEN, 
         HISPSIMPLE, LABFORCE, EDUSIMPLE, SCHLCOLL, 
         VOWHYNOT, VOYNOTREG, VOTEHOW, VOTEWHEN, 
         VOREGHOW, VOTED, VOREG, VOSUPPWT)%>%
  left_join(STATEFIP)%>%
  left_join(METRO)%>%
  left_join(RACESIMP)%>%
  left_join(MARRSIMP)%>%
  left_join(VETSTAT)%>%
  left_join(CITIZEN)%>%
  left_join(HISPSIMP)%>%
  left_join(LABFORCE)%>%
  left_join(EDUSIMPLE)%>%
  left_join(SCHLCOLL)%>%
  left_join(VOWHYNOT)%>%
  left_join(VOYNOTREG)%>%
  left_join(VOTEHOW)%>%
  left_join(VOTEWHEN)%>%
  left_join(VOREGHOW)%>%
  left_join(VOTED)%>%
  left_join(VOREG)
## Joining, by = "STATEFIP"
## Joining, by = "METRO"
## Joining, by = "RACESIMPLE"
## Joining, by = "MARRSIMPLE"
## Joining, by = "VETSTAT"
## Joining, by = "CITIZEN"
## Joining, by = "HISPSIMPLE"
## Joining, by = "LABFORCE"
## Joining, by = "EDUSIMPLE"
## Joining, by = "SCHLCOLL"
## Joining, by = "VOWHYNOT"
## Joining, by = "VOYNOTREG"
## Joining, by = "VOTEHOW"
## Joining, by = "VOTEWHEN"
## Joining, by = "VOREGHOW"
## Joining, by = "VOTED"
## Joining, by = "VOREG"

Some Data Viz

Race and Voter Turn-out

# RACE and VOTED
ggplot(trim1618, aes(Voted, fill=RaceSimp))+
  geom_bar(position = "fill")+
  facet_grid(.~YEAR)

ggplot(trim1618, aes(Voted, fill=RaceSimp))+
  geom_bar()+
  facet_grid(.~YEAR)

Education and Voter Turn-out

# The higher the education the more likely to vote
ggplot(trim1618, aes(YEAR, fill=Voted))+
  geom_bar(position = "fill")+
  facet_grid(.~EDUSIMPLE)

Marital Status and Voter Turn-out

## MARRIED 
# more likely to vote if married 
ggplot(trim1618, aes(YEAR, fill=Voted))+
  geom_bar(position = "fill")+
  facet_grid(.~MARRSIMPLE)

Reasons why people dont vote

## REASONS FOR NOT VOTING
sumWhy<-trim1618%>%
  filter(WhyNotVote!="NIU")%>%
  group_by(YEAR, WhyNotVote)%>%
  summarise(n=n())
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
ggplot(sumWhy, aes(x=reorder(WhyNotVote, n), y=n, fill=as.factor(YEAR)))+
  geom_bar(stat="identity", position="dodge2")+
  #facet_grid(.~YEAR)+
  coord_flip()+ 
  #theme(legend.position = "none")+
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank())+
  ggtitle("Why don't people vote?")

Maps

Voter turn-out by state:

#install.packages("usmap")
library(usmap)
## Warning: package 'usmap' was built under R version 3.6.2
#install.packages("plotly")
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#install.packages("viridis")
library(viridis)
## Loading required package: viridisLite
states <- usmap::us_map()

state18<-trim1618%>%
  group_by(YEAR, State, Voted)%>%
  summarise(nVote=n(), 
            nWgtVote=sum(VOSUPPWT, na.rm=TRUE))
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
#head(state18)

state18T<-trim1618%>%
  group_by(YEAR, State)%>%
  summarise(n=n(), 
            nWgt=sum(VOSUPPWT, na.rm=TRUE))
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
statePropVote<-state18%>%
  filter(Voted=="Voted")%>%
  left_join(state18T)%>%
  mutate(sampPropVote=nVote/n, 
         wgtPropVote=nWgtVote/nWgt)
## Joining, by = c("YEAR", "State")
#hist(statePropVote$wgtPropVote)

mapPropVote<-states%>%
  mutate(State=full)%>%
  left_join(statePropVote)
## Joining, by = "State"
this.year=2016

p<-mapPropVote%>%
  filter(YEAR==this.year)%>%
  ggplot(aes(x, y, group = group)) +
  geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  labs(x="", y = "", 
       caption = "(Based on data from IPUMS)", 
       fill = 'Percent',
       title=paste( this.year, " Voter Turn-out"))+
  scale_fill_viridis(option="magma", direction = -1)
## Warning: Ignoring unknown aesthetics: text
ggplotly(p)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Compare vote turn-out accross 20016 and 2018

#Facet to maintain comparisons

p<-mapPropVote%>%
  ggplot(aes(x, y, group = group)) +
  geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  labs(x="", y = "", 
       caption = "(Based on data from IPUMS)", 
       fill = 'Percent',
       title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
  facet_grid(.~YEAR)+
  scale_fill_viridis(option="magma", direction = -1)
## Warning: Ignoring unknown aesthetics: text
ggplotly(p)
### Slightly different colors

p<-mapPropVote%>%
  ggplot(aes(x, y, group = group)) +
  geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  labs(x="", y = "", 
       caption = "(Based on data from IPUMS)", 
       fill = 'Percent',
       title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
  facet_grid(.~YEAR)+
  scale_fill_viridis(direction = -1)
## Warning: Ignoring unknown aesthetics: text
ggplotly(p)

Methods of voting: Mail-in vs In-person