FALL 2020 Data Challenge: Voter Turn-out

Download the data

I uploaded the data to my github:

library(tidyverse)

### this just puts the 2016 and 2018 datasets together
data1618<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_2016_2018.csv",
                   header=TRUE)
#dim(data1618)
#names(data1618)

## data dictionary
dd<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_dictionary.csv",
             header=TRUE, 
             stringsAsFactors = FALSE)

Variable Labels

Label dataframes:

## STATE FIP CODES
STATEFIP<-dd%>%
  filter(variable=="STATEFIP")

STATEFIP<-STATEFIP[-1, 2:3]
colnames(STATEFIP)<-c("STATEFIP", "State")

## METRO CODES
METRO<-dd%>%
  filter(variable=="METRO")

METRO<-METRO[-1, 2:3]
colnames(METRO)<-c("METRO", "Metro")

## RACE CODES
RACE<-dd%>%
  filter(variable=="RACE")

RACE<-RACE[-1, 2:3]
colnames(RACE)<-c("RACE", "Race")

#### We might prefer to use the simplified RACE variable 
RACESIMP<-dd%>%
  filter(variable=="RACESIMPLE")

RACESIMP<-RACESIMP[-1, 2:3]
colnames(RACESIMP)<-c("RACESIMPLE", "RaceSimp")

## Martial Status (MARST) CODES
MARST<-dd%>%
  filter(variable=="MARST")

MARST<-MARST[-1, 2:3]
colnames(MARST)<-c("MARST", "Martial")

#### We might prefer to use the simplified MARST variable 
MARRSIMP<-dd%>%
  filter(variable=="MARRSIMPLE")

MARRSIMP<-MARRSIMP[-1, 2:3]
colnames(MARRSIMP)<-c("MARRSIMPLE", "MartialSimp")

## VETSTAT CODES
VETSTAT<-dd%>%
  filter(variable=="VETSTAT")

VETSTAT<-VETSTAT[-1, 2:3]
colnames(VETSTAT)<-c("VETSTAT", "Vet")

## CITIZEN CODES
CITIZEN<-dd%>%
  filter(variable=="CITIZEN")

CITIZEN<-CITIZEN[-1, 2:3]
colnames(CITIZEN)<-c("CITIZEN", "Citizen")

## HISPAN CODES
HISPAN<-dd%>%
  filter(variable=="HISPAN")

HISPAN<-HISPAN[-1, 2:3]
colnames(HISPAN)<-c("HISPAN", "Hispanic")

#### We might prefer to use the simplified HISPAN variable 
HISPSIMP<-dd%>%
  filter(variable=="HISPSIMPLE")

HISPSIMP<-HISPSIMP[-1, 2:3]
colnames(HISPSIMP)<-c("HISPSIMPLE", "HispanSimp")

## LABFORCE CODES
LABFORCE<-dd%>%
  filter(variable=="LABFORCE")

LABFORCE<-LABFORCE[-1, 2:3]
colnames(LABFORCE)<-c("LABFORCE", "Labor")

## EDUC99 CODES (Education Attainment)
EDUC99<-dd%>%
  filter(variable=="EDUC99")

EDUC99<-EDUC99[-1, 2:3]
colnames(EDUC99)<-c("EDUC99", "Edu1990")

## EDCYC CODES (Years of college credit)
EDCYC<-dd%>%
  filter(variable=="EDCYC")

EDCYC<-EDCYC[-1, 2:3]
colnames(EDCYC)<-c("EDCYC", "College")

## EDDIPGED CODES (Highschool or GED)
EDDIPGED<-dd%>%
  filter(variable=="EDDIPGED")

EDDIPGED<-EDDIPGED[-1, 2:3]
colnames(EDDIPGED)<-c("EDDIPGED", "HighGED")

## EDHGCGED CODES (Highest grade before GED)
EDHGCGED<-dd%>%
  filter(variable=="EDHGCGED")

EDHGCGED<-EDHGCGED[-1, 2:3]
colnames(EDHGCGED)<-c("EDHGCGED", "HighestGrade")

#### We might prefer to use the simplified EDU variable 
EDUSIMPLE<-dd%>%
  filter(variable=="EDUSIMPLE")

EDUSIMPLE<-EDUSIMPLE[-1, 2:3]
colnames(EDUSIMPLE)<-c("EDUSIMPLE", "EduSimp")

## SCHLCOLL CODES (School or college attendance)
SCHLCOLL<-dd%>%
  filter(variable=="SCHLCOLL")

SCHLCOLL<-SCHLCOLL[-1, 2:3]
colnames(SCHLCOLL)<-c("SCHLCOLL", "SchoolAttend")

## Reason why eligible voter did not vote
VOWHYNOT<-dd%>%
  filter(variable=="VOWHYNOT")

VOWHYNOT<-VOWHYNOT[-1, 2:3]
colnames(VOWHYNOT)<-c("VOWHYNOT", "WhyNotVote")
VOWHYNOT[8,2]<-"Registration Problems"

## Reason why eligible voter did not register to vote
VOYNOTREG<-dd%>%
  filter(variable=="VOYNOTREG")

VOYNOTREG<-VOYNOTREG[-1, 2:3]
colnames(VOYNOTREG)<-c("VOYNOTREG", "WhyNotReg")

## Method of voting in the most recent November election
VOTEHOW<-dd%>%
  filter(variable=="VOTEHOW")

VOTEHOW<-VOTEHOW[-1, 2:3]
colnames(VOTEHOW)<-c("VOTEHOW", "MethodVote")

## Voted on or before election day
VOTEWHEN<-dd%>%
  filter(variable=="VOTEWHEN")

VOTEWHEN<-VOTEWHEN[-1, 2:3]
colnames(VOTEWHEN)<-c("VOTEWHEN", "VoteWhen")

## Method of registering to vote
VOREGHOW<-dd%>%
  filter(variable=="VOREGHOW")

VOREGHOW<-VOREGHOW[-1, 2:3]
colnames(VOREGHOW)<-c("VOREGHOW", "MethodReg")

## Voted for the most recent November election
VOTED<-dd%>%
  filter(variable=="VOTED")

VOTED<-VOTED[-1, 2:3]
colnames(VOTED)<-c("VOTED", "Voted")

## Registered for the most recent November election
VOREG<-dd%>%
  filter(variable=="VOREG")

VOREG<-VOREG[-1, 2:3]
colnames(VOREG)<-c("VOREG", "Registered")

Join Labels

##### Select Columns and join for labels
trim1618<-data1618%>%
  select(YEAR, STATEFIP, METRO, AGE, SEX, 
         RACESIMPLE, MARRSIMPLE, VETSTAT, CITIZEN, 
         HISPSIMPLE, LABFORCE, EDUSIMPLE, SCHLCOLL, 
         VOWHYNOT, VOYNOTREG, VOTEHOW, VOTEWHEN, 
         VOREGHOW, VOTED, VOREG, VOSUPPWT)%>%
  left_join(STATEFIP)%>%
  left_join(METRO)%>%
  left_join(RACESIMP)%>%
  left_join(MARRSIMP)%>%
  left_join(VETSTAT)%>%
  left_join(CITIZEN)%>%
  left_join(HISPSIMP)%>%
  left_join(LABFORCE)%>%
  left_join(EDUSIMPLE)%>%
  left_join(SCHLCOLL)%>%
  left_join(VOWHYNOT)%>%
  left_join(VOYNOTREG)%>%
  left_join(VOTEHOW)%>%
  left_join(VOTEWHEN)%>%
  left_join(VOREGHOW)%>%
  left_join(VOTED)%>%
  left_join(VOREG)

## Joining, by = "STATEFIP"

## Joining, by = "METRO"

## Joining, by = "RACESIMPLE"

## Joining, by = "MARRSIMPLE"

## Joining, by = "VETSTAT"

## Joining, by = "CITIZEN"

## Joining, by = "HISPSIMPLE"

## Joining, by = "LABFORCE"

## Joining, by = "EDUSIMPLE"

## Joining, by = "SCHLCOLL"

## Joining, by = "VOWHYNOT"

## Joining, by = "VOYNOTREG"

## Joining, by = "VOTEHOW"

## Joining, by = "VOTEWHEN"

## Joining, by = "VOREGHOW"

## Joining, by = "VOTED"

## Joining, by = "VOREG"

Some Data Viz

Race and Voter Turn-out

# RACE and VOTED
ggplot(trim1618, aes(Voted, fill=RaceSimp))+
  geom_bar(position = "fill")+
  facet_grid(.~YEAR)

ggplot(trim1618, aes(Voted, fill=RaceSimp))+
  geom_bar()+
  facet_grid(.~YEAR)

Education and Voter Turn-out

# The higher the education the more likely to vote
ggplot(trim1618, aes(YEAR, fill=Voted))+
  geom_bar(position = "fill")+
  facet_grid(.~EDUSIMPLE)

Marital Status and Voter Turn-out

## MARRIED 
# more likely to vote if married 
ggplot(trim1618, aes(YEAR, fill=Voted))+
  geom_bar(position = "fill")+
  facet_grid(.~MARRSIMPLE)

Reasons why people dont vote

## REASONS FOR NOT VOTING
sumWhy<-trim1618%>%
  filter(WhyNotVote!="NIU")%>%
  group_by(YEAR, WhyNotVote)%>%
  summarise(n=n())

## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)

ggplot(sumWhy, aes(x=reorder(WhyNotVote, n), y=n, fill=as.factor(YEAR)))+
  geom_bar(stat="identity", position="dodge2")+
  #facet_grid(.~YEAR)+
  coord_flip()+ 
  #theme(legend.position = "none")+
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank())+
  ggtitle("Why don't people vote?")

Maps

Voter turn-out by state:

#install.packages("usmap")
library(usmap)

## Warning: package 'usmap' was built under R version 3.6.2

#install.packages("plotly")
library(plotly)

## Warning: package 'plotly' was built under R version 3.6.2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

#install.packages("viridis")
library(viridis)

## Loading required package: viridisLite

states <- usmap::us_map()

state18<-trim1618%>%
  group_by(YEAR, State, Voted)%>%
  summarise(nVote=n(), 
            nWgtVote=sum(VOSUPPWT, na.rm=TRUE))

## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)

#head(state18)

state18T<-trim1618%>%
  group_by(YEAR, State)%>%
  summarise(n=n(), 
            nWgt=sum(VOSUPPWT, na.rm=TRUE))

## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)

statePropVote<-state18%>%
  filter(Voted=="Voted")%>%
  left_join(state18T)%>%
  mutate(sampPropVote=nVote/n, 
         wgtPropVote=nWgtVote/nWgt)

## Joining, by = c("YEAR", "State")

#hist(statePropVote$wgtPropVote)

mapPropVote<-states%>%
  mutate(State=full)%>%
  left_join(statePropVote)

## Joining, by = "State"

this.year=2016

p<-mapPropVote%>%
  filter(YEAR==this.year)%>%
  ggplot(aes(x, y, group = group)) +
  geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  labs(x="", y = "", 
       caption = "(Based on data from IPUMS)", 
       fill = 'Percent',
       title=paste( this.year, " Voter Turn-out"))+
  scale_fill_viridis(option="magma", direction = -1)

## Warning: Ignoring unknown aesthetics: text

ggplotly(p)

## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Compare vote turn-out accross 20016 and 2018

#Facet to maintain comparisons

p<-mapPropVote%>%
  ggplot(aes(x, y, group = group)) +
  geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  labs(x="", y = "", 
       caption = "(Based on data from IPUMS)", 
       fill = 'Percent',
       title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
  facet_grid(.~YEAR)+
  scale_fill_viridis(option="magma", direction = -1)

## Warning: Ignoring unknown aesthetics: text

ggplotly(p)

### Slightly different colors

p<-mapPropVote%>%
  ggplot(aes(x, y, group = group)) +
  geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  labs(x="", y = "", 
       caption = "(Based on data from IPUMS)", 
       fill = 'Percent',
       title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
  facet_grid(.~YEAR)+
  scale_fill_viridis(direction = -1)

## Warning: Ignoring unknown aesthetics: text

ggplotly(p)

Methods of voting: Mail-in vs In-person

Mail-in Voting is more popular in the West Coast

method<-trim1618%>%
  filter(!MethodVote %in% c("Don't know", "NIU", "Refused", "No Response"))%>%
  group_by(YEAR, State, MethodVote)%>%
  summarise(nVoteM=n(), 
            nWgtVoteM=sum(VOSUPPWT, na.rm=TRUE))%>%
  mutate(state=State)

## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)

unique(method$MethodVote)

## [1] "By mail"   "In person"

#install.packages("fivethirtyeight")
#install.packages("gridExtra")

library(fivethirtyeight)

## Warning: package 'fivethirtyeight' was built under R version 3.6.2

## Some larger datasets need to be installed separately, like senators and
## house_district_forecast. To install these, we recommend you install the
## fivethirtyeightdata package by running:
## install.packages('fivethirtyeightdata', repos =
## 'https://fivethirtyeightdata.github.io/drat/', type = 'source')

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

#install.packages("geofacet")
library(geofacet)

## Warning: package 'geofacet' was built under R version 3.6.2

method%>%
  filter(YEAR==2016)%>%
  ggplot(aes(x=1, y=nWgtVoteM, fill = MethodVote)) +
  geom_col(position="fill") +
  #coord_flip() +
  facet_geo(~ state) +
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  labs(x="", y = "", 
       caption = "Voting by mail is more popular in the West Coast (Based on data from IPUMS)", 
       fill = 'Method of Voting',
       title=paste("Most Popular Method of Voting in 2016"))

Voting Method Related to Voter Turn-out

methodT<-method%>%
  group_by(YEAR, State, state)%>%
  summarise(nVoteTot=sum(nVoteM), 
            wgtVoteTot=sum(nWgtVoteM))%>%
  left_join(method)%>%
  mutate(pctMethodVote=nVoteM/nVoteTot,
         wgtPctMethodVote=nWgtVoteM/wgtVoteTot)

## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)

## Joining, by = c("YEAR", "State", "state")

mailIn<-methodT%>%
  filter(MethodVote=="By mail")%>%
  left_join(statePropVote)

## Joining, by = c("YEAR", "State")

#head(mailIn)

p<-mailIn%>%
  filter(YEAR==2016)%>%
  ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
  geom_point(aes(text=State), alpha=.7)+
  geom_smooth(method="lm", se=FALSE)+
  theme_minimal()+
  labs(x="Percent Mail-in Votes (Weighted)", y = "Voter Turn-out (Weighted)", 
       caption = "(2016 Election 2016 Based on data from IPUMS)", 
       color = 'Population',
       title="Increased Mail-in Votes Related to Higher Voter Turn-out")+
  scale_color_viridis(direction = -1)+
  scale_size(trans="sqrt", range=c(0.1, 7))

## Warning: Ignoring unknown aesthetics: text

ggplotly(p, tooltip = "text")

## `geom_smooth()` using formula 'y ~ x'

## Fit Some Models

Keep in mind that we should use the survey weights provided.

Chi-Squared Tests

## NEED TO ACCOUNT FOR WEIGHTING
#install.packages("questionr")
library(questionr)

## Warning: package 'questionr' was built under R version 3.6.2

### VOTED VS EDUSIMP
str(trim1618)

## 'data.frame':    152824 obs. of  38 variables:
##  $ YEAR        : int  2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
##  $ STATEFIP    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ METRO       : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ AGE         : int  70 63 59 79 57 80 85 47 76 24 ...
##  $ SEX         : int  1 2 1 2 2 1 2 1 2 1 ...
##  $ RACESIMPLE  : int  1 1 2 2 2 1 1 1 1 2 ...
##  $ MARRSIMPLE  : int  1 1 2 3 3 1 1 3 1 3 ...
##  $ VETSTAT     : int  2 1 1 1 1 2 1 1 1 1 ...
##  $ CITIZEN     : int  1 1 1 1 1 4 1 1 1 1 ...
##  $ HISPSIMPLE  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ LABFORCE    : int  2 1 2 1 1 1 1 2 1 2 ...
##  $ EDUSIMPLE   : int  4 3 3 1 1 5 3 2 5 3 ...
##  $ SCHLCOLL    : int  0 0 0 0 0 0 0 5 0 3 ...
##  $ VOWHYNOT    : int  99 99 5 99 99 99 99 99 99 7 ...
##  $ VOYNOTREG   : int  99 6 99 97 97 99 99 4 99 99 ...
##  $ VOTEHOW     : int  1 99 99 99 99 1 1 99 1 99 ...
##  $ VOTEWHEN    : int  1 99 99 99 99 1 1 99 1 99 ...
##  $ VOREGHOW    : int  5 99 5 99 99 97 97 99 5 97 ...
##  $ VOTED       : int  2 1 1 1 1 2 2 1 2 1 ...
##  $ VOREG       : int  99 1 2 1 1 99 99 1 99 2 ...
##  $ VOSUPPWT    : num  1879 1519 1675 1500 1413 ...
##  $ State       : chr  "Alabama" "Alabama" "Alabama" "Alabama" ...
##  $ Metro       : chr  "Central city" "Central city" "Central city" "Central city" ...
##  $ RaceSimp    : chr  "White" "White" "Black" "Black" ...
##  $ MartialSimp : chr  "Married" "Married" "Divorced or Widowed" "Single or Never Married" ...
##  $ Vet         : chr  "Yes" "No service" "No service" "No service" ...
##  $ Citizen     : chr  "Born in U.S" "Born in U.S" "Born in U.S" "Born in U.S" ...
##  $ HispanSimp  : chr  "Not Hispanic" "Not Hispanic" "Not Hispanic" "Not Hispanic" ...
##  $ Labor       : chr  "Yes, in the labor force" "No, not in the labor force" "Yes, in the labor force" "No, not in the labor force" ...
##  $ EduSimp     : chr  "Associate degree" "Some college but no degree" "Some college but no degree" "Some school but no diploma" ...
##  $ SchoolAttend: chr  "NIU" "NIU" "NIU" "NIU" ...
##  $ WhyNotVote  : chr  "NIU" "NIU" "Too busy, conflicting work or school schedule" "NIU" ...
##  $ WhyNotReg   : chr  "NIU" "Not interested in the election or not involved in politics" "NIU" "Don't know" ...
##  $ MethodVote  : chr  "In person" "NIU" "NIU" "NIU" ...
##  $ VoteWhen    : chr  "On election day" "NIU" "NIU" "NIU" ...
##  $ MethodReg   : chr  "Went to a town hall or county/government registration office" "NIU" "Went to a town hall or county/government registration office" "NIU" ...
##  $ Voted       : chr  "Voted" "Did not vote" "Did not vote" "Did not vote" ...
##  $ Registered  : chr  "Not in universe" "Did not register" "Registered" "Did not register" ...

unique(trim1618$Voted)

## [1] "Voted"        "Did not vote"

tbl = wtd.table(trim1618$Voted, trim1618$EduSimp, weights=trim1618$VOSUPPWT) 
tbl

##              Associate degree Bachelors degree High school graduate or GED
## Did not vote       10336346.3       14258097.4                  44499001.9
## Voted              28466023.8       67231104.4                  61551938.3
##              Masters degree  No school Professional or Doctoral degree
## Did not vote      4034243.4   409580.7                       1281276.8
## Voted            29884542.5   156548.2                      10435339.2
##              Some college but no degree Some school but no diploma
## Did not vote                 22710479.7                 17324670.5
## Voted                        50096529.2                 11995158.4

chisq.test(tbl)

## 
##  Pearson's Chi-squared test
## 
## data:  tbl
## X-squared = 32690751, df = 7, p-value < 2.2e-16

### VOTED VS RACESIMP
tbl2 = wtd.table(trim1618$Voted, trim1618$RaceSimp, weights=trim1618$VOSUPPWT) 
tbl2

##              American Indian or Aleut or Eskimo Asian or Pacific Islander
## Did not vote                            1999820                   7073032
## Voted                                   2098503                  10128785
##                  Black More than one race     White
## Did not vote  13803392            2624594  89352859
## Voted         32313137            4130562 211146197

chisq.test(tbl2)

## 
##  Pearson's Chi-squared test
## 
## data:  tbl2
## X-squared = 1864975, df = 4, p-value < 2.2e-16

### VOTED VS METRO
tbl3 = wtd.table(trim1618$Voted, trim1618$Metro, weights=trim1618$VOSUPPWT) 
tbl3

##              Central city Central city status unknown Not identifiable
## Did not vote     29792543                    17160529          1108273
## Voted            68554942                    34270899          2099900
##              Not in metro area Outside central city
## Did not vote          18264849             48527502
## Voted                 35185564            119705880

chisq.test(tbl3)

## 
##  Pearson's Chi-squared test
## 
## data:  tbl3
## X-squared = 776810, df = 4, p-value < 2.2e-16

### VOTED VS MARTIALSIMP
tbl4 = wtd.table(trim1618$Voted, trim1618$MartialSimp, weights=trim1618$VOSUPPWT) 
tbl4

##              Divorced or Widowed   Married Single or Never Married
## Did not vote            21431018  49993242                43429437
## Voted                   44570418 157602176                57644589

chisq.test(tbl4)

## 
##  Pearson's Chi-squared test
## 
## data:  tbl4
## X-squared = 11530088, df = 2, p-value < 2.2e-16

### VOTED VS VET
tbl5 = wtd.table(trim1618$Voted, trim1618$Vet, weights=trim1618$VOSUPPWT) 
tbl5

##              No service       Yes
## Did not vote  107821916   7031781
## Voted         235033022  24784162

chisq.test(tbl5)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tbl5
## X-squared = 1196521, df = 1, p-value < 2.2e-16

### VOTED VS CITIZEN
tbl6 = wtd.table(trim1618$Voted, trim1618$Citizen, weights=trim1618$VOSUPPWT) 
tbl6

##              Born abroad of American parents Born in U.S Born in U.S. outlying
## Did not vote                         1064127   100237770               1229508
## Voted                                2307519   235363239               1394888
##              Naturalized citizen
## Did not vote            12322292
## Voted                   20751538

chisq.test(tbl5)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tbl5
## X-squared = 1196521, df = 1, p-value < 2.2e-16

### VOTED VS Labor
tbl7 = wtd.table(trim1618$Voted, trim1618$Labor, weights=trim1618$VOSUPPWT) 
tbl7

##              No, not in the labor force Yes, in the labor force
## Did not vote                   42247712                72605984
## Voted                          89934150               169883034

chisq.test(tbl7)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tbl7
## X-squared = 164185, df = 1, p-value < 2.2e-16

Logistic Regression

Age is a significant predictor

ggplot(trim1618, aes(x=AGE, fill=Voted))+
  geom_boxplot()

qplot(x=AGE, y=VOTED, data = trim1618, 
      geom = "point", alpha = I(.1), ylab = "Vote") +
  stat_smooth(method = "glm", method.args = list(family = "binomial"),
              se = FALSE)

## `geom_smooth()` using formula 'y ~ x'

## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1

m1 <- glm(as.factor(Voted) ~ AGE, data = trim1618, family = "binomial")
summary(m1)

## 
## Call:
## glm(formula = as.factor(Voted) ~ AGE, family = "binomial", data = trim1618)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9493  -1.2968   0.7173   0.8747   1.1328  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.3325070  0.0161454  -20.59   <2e-16 ***
## AGE          0.0243576  0.0003231   75.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 186890  on 152823  degrees of freedom
## Residual deviance: 180929  on 152822  degrees of freedom
## AIC: 180933
## 
## Number of Fisher Scoring iterations: 4

Perhaps try some model selection on the full model

m2 <- glm(as.factor(Voted) ~ AGE+as.factor(SEX)+Metro+RaceSimp+
            Vet+Citizen+Labor+EduSimp, data = trim1618, family = "binomial")
summary(m2)

## 
## Call:
## glm(formula = as.factor(Voted) ~ AGE + as.factor(SEX) + Metro + 
##     RaceSimp + Vet + Citizen + Labor + EduSimp, family = "binomial", 
##     data = trim1618)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6034  -1.0212   0.5681   0.8140   2.2363  
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                            -1.228732   0.086803 -14.155  < 2e-16
## AGE                                     0.031703   0.000388  81.699  < 2e-16
## as.factor(SEX)2                         0.117764   0.012720   9.258  < 2e-16
## MetroCentral city status unknown       -0.130950   0.019700  -6.647 2.99e-11
## MetroNot identifiable                  -0.105859   0.059319  -1.785 0.074330
## MetroNot in metro area                 -0.090859   0.019002  -4.782 1.74e-06
## MetroOutside central city               0.001972   0.016463   0.120 0.904652
## RaceSimpAsian or Pacific Islander      -0.108298   0.058756  -1.843 0.065305
## RaceSimpBlack                           0.738456   0.053357  13.840  < 2e-16
## RaceSimpMore than one race              0.338411   0.067257   5.032 4.86e-07
## RaceSimpWhite                           0.437931   0.050100   8.741  < 2e-16
## VetYes                                  0.088528   0.023876   3.708 0.000209
## CitizenBorn in U.S                      0.101335   0.064150   1.580 0.114183
## CitizenBorn in U.S. outlying           -0.357707   0.100756  -3.550 0.000385
## CitizenNaturalized citizen             -0.268012   0.067519  -3.969 7.21e-05
## LaborYes, in the labor force            0.307313   0.014359  21.402  < 2e-16
## EduSimpBachelors degree                 0.615521   0.023892  25.762  < 2e-16
## EduSimpHigh school graduate or GED     -0.729573   0.021088 -34.597  < 2e-16
## EduSimpMasters degree                   0.928978   0.032670  28.435  < 2e-16
## EduSimpNo school                       -2.038980   0.160287 -12.721  < 2e-16
## EduSimpProfessional or Doctoral degree  1.039923   0.050507  20.590  < 2e-16
## EduSimpSome college but no degree      -0.096620   0.022770  -4.243 2.20e-05
## EduSimpSome school but no diploma      -1.498158   0.027548 -54.383  < 2e-16
##                                           
## (Intercept)                            ***
## AGE                                    ***
## as.factor(SEX)2                        ***
## MetroCentral city status unknown       ***
## MetroNot identifiable                  .  
## MetroNot in metro area                 ***
## MetroOutside central city                 
## RaceSimpAsian or Pacific Islander      .  
## RaceSimpBlack                          ***
## RaceSimpMore than one race             ***
## RaceSimpWhite                          ***
## VetYes                                 ***
## CitizenBorn in U.S                        
## CitizenBorn in U.S. outlying           ***
## CitizenNaturalized citizen             ***
## LaborYes, in the labor force           ***
## EduSimpBachelors degree                ***
## EduSimpHigh school graduate or GED     ***
## EduSimpMasters degree                  ***
## EduSimpNo school                       ***
## EduSimpProfessional or Doctoral degree ***
## EduSimpSome college but no degree      ***
## EduSimpSome school but no diploma      ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 186890  on 152823  degrees of freedom
## Residual deviance: 164164  on 152801  degrees of freedom
## AIC: 164210
## 
## Number of Fisher Scoring iterations: 4

FALL 2020 Data Challenge: Voter Turn-out

Willamette University - Statistics

Download the data

Variable Labels

Join Labels

Some Data Viz

Race and Voter Turn-out

Education and Voter Turn-out

Marital Status and Voter Turn-out

Reasons why people dont vote

Maps

Voter turn-out by state:

Compare vote turn-out accross 20016 and 2018

Methods of voting: Mail-in vs In-person

Mail-in Voting is more popular in the West Coast

Voting Method Related to Voter Turn-out

Chi-Squared Tests

Logistic Regression

Age is a significant predictor

Perhaps try some model selection on the full model