library(tidyverse)
## ── Attaching packages ───────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(geofacet)
library(usmap)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(viridis)
## Loading required package: viridisLite
data1618 <- read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_2016_2018.csv",
                   header=TRUE)
dd <- read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_dictionary.csv",
             header=TRUE, 
             stringsAsFactors = FALSE)
STATEFIP<-dd%>%
  filter(variable=="STATEFIP")

STATEFIP<-STATEFIP[-1, 2:3]
colnames(STATEFIP)<-c("STATEFIP", "State")

METRO<-dd%>%
  filter(variable=="METRO")

METRO<-METRO[-1, 2:3]
colnames(METRO)<-c("METRO", "Metro")
View(METRO)
RACE<-dd%>%
  filter(variable=="RACE")

RACE<-RACE[-1, 2:3]
colnames(RACE)<-c("RACE", "Race")

#### We might prefer to use the simplified RACE variable 
RACESIMP<-dd%>%
  filter(variable=="RACESIMPLE")

RACESIMP<-RACESIMP[-1, 2:3]
colnames(RACESIMP)<-c("RACESIMPLE", "RaceSimp")

## Martial Status (MARST) CODES
MARST<-dd%>%
  filter(variable=="MARST")

MARST<-MARST[-1, 2:3]
colnames(MARST)<-c("MARST", "Martial")

#### We might prefer to use the simplified MARST variable 
MARRSIMP<-dd%>%
  filter(variable=="MARRSIMPLE")

MARRSIMP<-MARRSIMP[-1, 2:3]
colnames(MARRSIMP)<-c("MARRSIMPLE", "MartialSimp")

## VETSTAT CODES
VETSTAT<-dd%>%
  filter(variable=="VETSTAT")

VETSTAT<-VETSTAT[-1, 2:3]
colnames(VETSTAT)<-c("VETSTAT", "Vet")

## CITIZEN CODES
CITIZEN<-dd%>%
  filter(variable=="CITIZEN")

CITIZEN<-CITIZEN[-1, 2:3]
colnames(CITIZEN)<-c("CITIZEN", "Citizen")

## HISPAN CODES
HISPAN<-dd%>%
  filter(variable=="HISPAN")

HISPAN<-HISPAN[-1, 2:3]
colnames(HISPAN)<-c("HISPAN", "Hispanic")

#### We might prefer to use the simplified HISPAN variable 
HISPSIMP<-dd%>%
  filter(variable=="HISPSIMPLE")

HISPSIMP<-HISPSIMP[-1, 2:3]
colnames(HISPSIMP)<-c("HISPSIMPLE", "HispanSimp")

## LABFORCE CODES
LABFORCE<-dd%>%
  filter(variable=="LABFORCE")

LABFORCE<-LABFORCE[-1, 2:3]
colnames(LABFORCE)<-c("LABFORCE", "Labor")

## EDUC99 CODES (Education Attainment)
EDUC99<-dd%>%
  filter(variable=="EDUC99")

EDUC99<-EDUC99[-1, 2:3]
colnames(EDUC99)<-c("EDUC99", "Edu1990")

## EDCYC CODES (Years of college credit)
EDCYC<-dd%>%
  filter(variable=="EDCYC")

EDCYC<-EDCYC[-1, 2:3]
colnames(EDCYC)<-c("EDCYC", "College")

## EDDIPGED CODES (Highschool or GED)
EDDIPGED<-dd%>%
  filter(variable=="EDDIPGED")

EDDIPGED<-EDDIPGED[-1, 2:3]
colnames(EDDIPGED)<-c("EDDIPGED", "HighGED")

## EDHGCGED CODES (Highest grade before GED)
EDHGCGED<-dd%>%
  filter(variable=="EDHGCGED")

EDHGCGED<-EDHGCGED[-1, 2:3]
colnames(EDHGCGED)<-c("EDHGCGED", "HighestGrade")

#### We might prefer to use the simplified EDU variable 
EDUSIMPLE<-dd%>%
  filter(variable=="EDUSIMPLE")

EDUSIMPLE<-EDUSIMPLE[-1, 2:3]
colnames(EDUSIMPLE)<-c("EDUSIMPLE", "EduSimp")

## SCHLCOLL CODES (School or college attendance)
SCHLCOLL<-dd%>%
  filter(variable=="SCHLCOLL")

SCHLCOLL<-SCHLCOLL[-1, 2:3]
colnames(SCHLCOLL)<-c("SCHLCOLL", "SchoolAttend")

## Reason why eligible voter did not vote
VOWHYNOT<-dd%>%
  filter(variable=="VOWHYNOT")

VOWHYNOT<-VOWHYNOT[-1, 2:3]
colnames(VOWHYNOT)<-c("VOWHYNOT", "WhyNotVote")
VOWHYNOT[8,2]<-"Registration Problems"

## Reason why eligible voter did not register to vote
VOYNOTREG<-dd%>%
  filter(variable=="VOYNOTREG")

VOYNOTREG<-VOYNOTREG[-1, 2:3]
colnames(VOYNOTREG)<-c("VOYNOTREG", "WhyNotReg")

## Method of voting in the most recent November election
VOTEHOW<-dd%>%
  filter(variable=="VOTEHOW")

VOTEHOW<-VOTEHOW[-1, 2:3]
colnames(VOTEHOW)<-c("VOTEHOW", "MethodVote")

## Voted on or before election day
VOTEWHEN<-dd%>%
  filter(variable=="VOTEWHEN")

VOTEWHEN<-VOTEWHEN[-1, 2:3]
colnames(VOTEWHEN)<-c("VOTEWHEN", "VoteWhen")

## Method of registering to vote
VOREGHOW<-dd%>%
  filter(variable=="VOREGHOW")

VOREGHOW<-VOREGHOW[-1, 2:3]
colnames(VOREGHOW)<-c("VOREGHOW", "MethodReg")

## Voted for the most recent November election
VOTED<-dd%>%
  filter(variable=="VOTED")

VOTED<-VOTED[-1, 2:3]
colnames(VOTED)<-c("VOTED", "Voted")

## Registered for the most recent November election
VOREG<-dd%>%
  filter(variable=="VOREG")

VOREG<-VOREG[-1, 2:3]
colnames(VOREG)<-c("VOREG", "Registered")
trim1618<-data1618%>%
  select(YEAR, STATEFIP, METRO, AGE, SEX, 
         RACESIMPLE, MARRSIMPLE, VETSTAT, CITIZEN, 
         HISPSIMPLE, LABFORCE, EDUSIMPLE, SCHLCOLL, 
         VOWHYNOT, VOYNOTREG, VOTEHOW, VOTEWHEN, 
         VOREGHOW, VOTED, VOREG, VOSUPPWT)%>%
  left_join(STATEFIP)%>%
  left_join(METRO)%>%
  left_join(RACESIMP)%>%
  left_join(MARRSIMP)%>%
  left_join(VETSTAT)%>%
  left_join(CITIZEN)%>%
  left_join(HISPSIMP)%>%
  left_join(LABFORCE)%>%
  left_join(EDUSIMPLE)%>%
  left_join(SCHLCOLL)%>%
  left_join(VOWHYNOT)%>%
  left_join(VOYNOTREG)%>%
  left_join(VOTEHOW)%>%
  left_join(VOTEWHEN)%>%
  left_join(VOREGHOW)%>%
  left_join(VOTED)%>%
  left_join(VOREG)
## Joining, by = "STATEFIP"
## Joining, by = "METRO"
## Joining, by = "RACESIMPLE"
## Joining, by = "MARRSIMPLE"
## Joining, by = "VETSTAT"
## Joining, by = "CITIZEN"
## Joining, by = "HISPSIMPLE"
## Joining, by = "LABFORCE"
## Joining, by = "EDUSIMPLE"
## Joining, by = "SCHLCOLL"
## Joining, by = "VOWHYNOT"
## Joining, by = "VOYNOTREG"
## Joining, by = "VOTEHOW"
## Joining, by = "VOTEWHEN"
## Joining, by = "VOREGHOW"
## Joining, by = "VOTED"
## Joining, by = "VOREG"

###Why dont people vote?

sumWhy<-trim1618%>%
  filter(WhyNotVote!="NIU")%>%
  group_by(YEAR, WhyNotVote)%>%
  summarise(n=n())
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
ggplot(sumWhy, aes(x=reorder(WhyNotVote, n), y=n, fill=as.factor(YEAR)))+
  geom_bar(stat="identity", position="dodge2")+
  #facet_grid(.~YEAR)+
  coord_flip()+ 
  #theme(legend.position = "none")+
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank())+
  ggtitle("Why don't people vote?")

We can see that the main reason why people dont vote is because they are too busy. We should make it more accessible to vote. Also, what is interesting is the second biggest reason why people dont vote is because they feel as if they wouldnt make a difference. After the 2020 election, we have seen that voting DOES make a difference!

Voter Turn Out

states <- usmap::us_map()

state18<-trim1618%>%
  group_by(YEAR, State, Voted)%>%
  summarise(nVote=n(), 
            nWgtVote=sum(VOSUPPWT, na.rm=TRUE))
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
state18T<-trim1618%>%
  group_by(YEAR, State)%>%
  summarise(n=n(), 
            nWgt=sum(VOSUPPWT, na.rm=TRUE))
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
statePropVote<-state18%>%
  filter(Voted=="Voted")%>%
  left_join(state18T)%>%
  mutate(sampPropVote=nVote/n, 
         wgtPropVote=nWgtVote/nWgt)
## Joining, by = c("YEAR", "State")
mapPropVote<-states%>%
  mutate(State=full)%>%
  left_join(statePropVote)
## Joining, by = "State"
p<-mapPropVote%>%
  ggplot(aes(x, y, group = group)) +
  geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  labs(x="", y = "", 
       caption = "(Based on data from IPUMS)", 
       fill = 'Percent',
       title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
  facet_grid(.~YEAR)+
  scale_fill_viridis(direction = -1)
## Warning: Ignoring unknown aesthetics: text

Voter turn out by state in the two different years

ggplotly(p)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

It shows that in 2018, the voting population in the more lighter states has gone down.

How people voted vs their race

ggplot(trim1618, aes(VoteWhen, fill=RaceSimp))+
  geom_bar(position = "fill")+
  facet_grid(.~YEAR)

### Voter turn out vs Race

ggplot(trim1618, aes(Voted, fill=RaceSimp))+
  geom_bar(position = "fill")+
  facet_grid(.~YEAR)

From this graph, it is evident that people who are not white, dont vote as much. We should encourage these groups to vote more.

Voter turn out vs Education

trim1618$EduSimp<-factor(trim1618$EduSimp, levels =c("No school",                                                    "Some school but no diploma",                                                    "High school graduate or GED",                                                    "Some college but no degree",                                                  "Associate degree",                                              "Bachelors degree",                                              "Masters degree",                                            "Professional or Doctoral degree"))
ggplot(trim1618, aes(Voted, fill=EduSimp))+ geom_bar()+  facet_grid(.~YEAR)

From this graph we can see that people with higher education vote more. We can target people with lower education statuses to vote.

How they voted vs Education

ggplot(trim1618, aes(VoteWhen, fill=EduSimp))+
  geom_bar(position = "fill")+
  facet_grid(.~YEAR)

We need to encourage more to vote before election day

###Voter turnout vs Age

ggplot(trim1618, aes(x=AGE, fill=Voted))+
  geom_boxplot()

Older people tend to vote. We should be targeting colleges an encourage them to vote. One way to do this is to have a voting registration booth so students who are unregistered can register to vote.

Voter turn out vs Marriage Status

ggplot(trim1618, aes(x=MartialSimp, fill=Voted))+
  geom_bar()

` ###When people voted vs Age

ggplot(trim1618, aes(AGE, fill=VoteWhen))+
  geom_bar(position = "fill")+
  facet_grid(.~YEAR)

GLM of if people voted as a function of all the variables

m2 <- glm(as.factor(Voted) ~ AGE+as.factor(SEX)+Metro+RaceSimp+
            Vet+Citizen+Labor+EduSimp, data = trim1618, family = "binomial")
summary(m2)
## 
## Call:
## glm(formula = as.factor(Voted) ~ AGE + as.factor(SEX) + Metro + 
##     RaceSimp + Vet + Citizen + Labor + EduSimp, family = "binomial", 
##     data = trim1618)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6034  -1.0212   0.5681   0.8140   2.2363  
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                            -3.267712   0.179559 -18.199  < 2e-16
## AGE                                     0.031703   0.000388  81.699  < 2e-16
## as.factor(SEX)2                         0.117764   0.012720   9.258  < 2e-16
## MetroCentral city status unknown       -0.130950   0.019700  -6.647 2.99e-11
## MetroNot identifiable                  -0.105859   0.059319  -1.785 0.074330
## MetroNot in metro area                 -0.090859   0.019002  -4.782 1.74e-06
## MetroOutside central city               0.001972   0.016463   0.120 0.904652
## RaceSimpAsian or Pacific Islander      -0.108298   0.058756  -1.843 0.065305
## RaceSimpBlack                           0.738456   0.053357  13.840  < 2e-16
## RaceSimpMore than one race              0.338411   0.067257   5.032 4.86e-07
## RaceSimpWhite                           0.437931   0.050100   8.741  < 2e-16
## VetYes                                  0.088528   0.023876   3.708 0.000209
## CitizenBorn in U.S                      0.101335   0.064150   1.580 0.114183
## CitizenBorn in U.S. outlying           -0.357707   0.100756  -3.550 0.000385
## CitizenNaturalized citizen             -0.268012   0.067519  -3.969 7.21e-05
## LaborYes, in the labor force            0.307313   0.014359  21.402  < 2e-16
## EduSimpSome school but no diploma       0.540822   0.160149   3.377 0.000733
## EduSimpHigh school graduate or GED      1.309406   0.159446   8.212  < 2e-16
## EduSimpSome college but no degree       1.942360   0.159777  12.157  < 2e-16
## EduSimpAssociate degree                 2.038980   0.160287  12.721  < 2e-16
## EduSimpBachelors degree                 2.654500   0.159937  16.597  < 2e-16
## EduSimpMasters degree                   2.967957   0.161445  18.384  < 2e-16
## EduSimpProfessional or Doctoral degree  3.078902   0.165914  18.557  < 2e-16
##                                           
## (Intercept)                            ***
## AGE                                    ***
## as.factor(SEX)2                        ***
## MetroCentral city status unknown       ***
## MetroNot identifiable                  .  
## MetroNot in metro area                 ***
## MetroOutside central city                 
## RaceSimpAsian or Pacific Islander      .  
## RaceSimpBlack                          ***
## RaceSimpMore than one race             ***
## RaceSimpWhite                          ***
## VetYes                                 ***
## CitizenBorn in U.S                        
## CitizenBorn in U.S. outlying           ***
## CitizenNaturalized citizen             ***
## LaborYes, in the labor force           ***
## EduSimpSome school but no diploma      ***
## EduSimpHigh school graduate or GED     ***
## EduSimpSome college but no degree      ***
## EduSimpAssociate degree                ***
## EduSimpBachelors degree                ***
## EduSimpMasters degree                  ***
## EduSimpProfessional or Doctoral degree ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 186890  on 152823  degrees of freedom
## Residual deviance: 164164  on 152801  degrees of freedom
## AIC: 164210
## 
## Number of Fisher Scoring iterations: 4

From this general linear model, we can see that almost all variables are significant.

MAIL IN VOTES BY STATE in 2016

methodT<-method%>%
  group_by(YEAR, State, state)%>%
  summarise(nVoteTot=sum(nVoteM), 
            wgtVoteTot=sum(nWgtVoteM))%>%
  left_join(method)%>%
  mutate(pctMethodVote=nVoteM/nVoteTot,
         wgtPctMethodVote=nWgtVoteM/wgtVoteTot)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
## Joining, by = c("YEAR", "State", "state")
mailIn<-methodT%>%
  filter(MethodVote=="By mail")%>%
  left_join(statePropVote)
## Joining, by = c("YEAR", "State")
p<-mailIn%>%
  filter(YEAR==2016)%>%
  ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
  geom_point(aes(text=State), alpha=.7)+
  geom_smooth(method="lm", se=FALSE)+
  theme_minimal()+
  labs(x="Percent Mail-in Votes (Weighted)", y = "Voter Turn-out (Weighted)", 
       caption = "(2016 Election 2016 Based on data from IPUMS)", 
       color = 'Population',
       title="Mail-in Votes Across the States in 2016")+
  scale_color_viridis(direction = -1)+
  scale_size(trans="sqrt", range=c(0.1, 7))
## Warning: Ignoring unknown aesthetics: text
ggplotly(p, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'

### MAIL IN VOTES BY STATE in 2016

methodT<-method%>%
  group_by(YEAR, State, state)%>%
  summarise(nVoteTot=sum(nVoteM), 
            wgtVoteTot=sum(nWgtVoteM))%>%
  left_join(method)%>%
  mutate(pctMethodVote=nVoteM/nVoteTot,
         wgtPctMethodVote=nWgtVoteM/wgtVoteTot)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
## Joining, by = c("YEAR", "State", "state")
mailIn<-methodT%>%
  filter(MethodVote=="By mail")%>%
  left_join(statePropVote)
## Joining, by = c("YEAR", "State")
w<-mailIn%>%
  filter(YEAR==2018)%>%
  ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
  geom_point(aes(text=State), alpha=.7)+
  geom_smooth(method="lm", se=FALSE)+
  theme_minimal()+
  labs(x="Percent Mail-in Votes (Weighted)", y = "Voter Turn-out (Weighted)", 
       caption = "(2016 Election 2016 Based on data from IPUMS)", 
       color = 'Population',
       title="Mail-in Votes Across the States in 2018")+
  scale_color_viridis(direction = -1)+
  scale_size(trans="sqrt", range=c(0.1, 7))
## Warning: Ignoring unknown aesthetics: text
ggplotly(w, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'

In person voting by states in 2016

methodT<-method%>%
  group_by(YEAR, State, state)%>%
  summarise(nVoteTot=sum(nVoteM), 
            wgtVoteTot=sum(nWgtVoteM))%>%
  left_join(method)%>%
  mutate(pctMethodVote=nVoteM/nVoteTot,
         wgtPctMethodVote=nWgtVoteM/wgtVoteTot)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
## Joining, by = c("YEAR", "State", "state")
InPerson<-methodT%>%
  filter(MethodVote=="In person")%>%
  left_join(statePropVote)
## Joining, by = c("YEAR", "State")
q<-InPerson%>%
  filter(YEAR==2016)%>%
  ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
  geom_point(aes(text=State), alpha=.7)+
  geom_smooth(method="lm", se=FALSE)+
  theme_minimal()+
  labs(x="Percent In Person Votes (Weighted)", y = "Voter Turn-out (Weighted)", 
       caption = "(2016 Election 2016 Based on data from IPUMS)", 
       color = 'Population',
       title="In Peson Votes in 2016")+
  scale_color_viridis(direction = -1)+
  scale_size(trans="sqrt", range=c(0.1, 7))
## Warning: Ignoring unknown aesthetics: text
ggplotly(q, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'

### In person voting by states in 2018

methodT<-method%>%
  group_by(YEAR, State, state)%>%
  summarise(nVoteTot=sum(nVoteM), 
            wgtVoteTot=sum(nWgtVoteM))%>%
  left_join(method)%>%
  mutate(pctMethodVote=nVoteM/nVoteTot,
         wgtPctMethodVote=nWgtVoteM/wgtVoteTot)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
## Joining, by = c("YEAR", "State", "state")
InPerson<-methodT%>%
  filter(MethodVote=="In person")%>%
  left_join(statePropVote)
## Joining, by = c("YEAR", "State")
g<-InPerson%>%
  filter(YEAR==2018)%>%
  ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
  geom_point(aes(text=State), alpha=.7)+
  geom_smooth(method="lm", se=FALSE)+
  theme_minimal()+
  labs(x="Percent In Person Votes (Weighted)", y = "Voter Turn-out (Weighted)", 
       caption = "(2016 Election 2016 Based on data from IPUMS)", 
       color = 'Population',
       title="In Peson Votes in 2018")+
  scale_color_viridis(direction = -1)+
  scale_size(trans="sqrt", range=c(0.1, 7))
## Warning: Ignoring unknown aesthetics: text
ggplotly(g, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'