library(tidyverse)
## ── Attaching packages ───────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(geofacet)
library(usmap)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(viridis)
## Loading required package: viridisLite
data1618 <- read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_2016_2018.csv",
header=TRUE)
dd <- read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_dictionary.csv",
header=TRUE,
stringsAsFactors = FALSE)
STATEFIP<-dd%>%
filter(variable=="STATEFIP")
STATEFIP<-STATEFIP[-1, 2:3]
colnames(STATEFIP)<-c("STATEFIP", "State")
METRO<-dd%>%
filter(variable=="METRO")
METRO<-METRO[-1, 2:3]
colnames(METRO)<-c("METRO", "Metro")
View(METRO)
RACE<-dd%>%
filter(variable=="RACE")
RACE<-RACE[-1, 2:3]
colnames(RACE)<-c("RACE", "Race")
#### We might prefer to use the simplified RACE variable
RACESIMP<-dd%>%
filter(variable=="RACESIMPLE")
RACESIMP<-RACESIMP[-1, 2:3]
colnames(RACESIMP)<-c("RACESIMPLE", "RaceSimp")
## Martial Status (MARST) CODES
MARST<-dd%>%
filter(variable=="MARST")
MARST<-MARST[-1, 2:3]
colnames(MARST)<-c("MARST", "Martial")
#### We might prefer to use the simplified MARST variable
MARRSIMP<-dd%>%
filter(variable=="MARRSIMPLE")
MARRSIMP<-MARRSIMP[-1, 2:3]
colnames(MARRSIMP)<-c("MARRSIMPLE", "MartialSimp")
## VETSTAT CODES
VETSTAT<-dd%>%
filter(variable=="VETSTAT")
VETSTAT<-VETSTAT[-1, 2:3]
colnames(VETSTAT)<-c("VETSTAT", "Vet")
## CITIZEN CODES
CITIZEN<-dd%>%
filter(variable=="CITIZEN")
CITIZEN<-CITIZEN[-1, 2:3]
colnames(CITIZEN)<-c("CITIZEN", "Citizen")
## HISPAN CODES
HISPAN<-dd%>%
filter(variable=="HISPAN")
HISPAN<-HISPAN[-1, 2:3]
colnames(HISPAN)<-c("HISPAN", "Hispanic")
#### We might prefer to use the simplified HISPAN variable
HISPSIMP<-dd%>%
filter(variable=="HISPSIMPLE")
HISPSIMP<-HISPSIMP[-1, 2:3]
colnames(HISPSIMP)<-c("HISPSIMPLE", "HispanSimp")
## LABFORCE CODES
LABFORCE<-dd%>%
filter(variable=="LABFORCE")
LABFORCE<-LABFORCE[-1, 2:3]
colnames(LABFORCE)<-c("LABFORCE", "Labor")
## EDUC99 CODES (Education Attainment)
EDUC99<-dd%>%
filter(variable=="EDUC99")
EDUC99<-EDUC99[-1, 2:3]
colnames(EDUC99)<-c("EDUC99", "Edu1990")
## EDCYC CODES (Years of college credit)
EDCYC<-dd%>%
filter(variable=="EDCYC")
EDCYC<-EDCYC[-1, 2:3]
colnames(EDCYC)<-c("EDCYC", "College")
## EDDIPGED CODES (Highschool or GED)
EDDIPGED<-dd%>%
filter(variable=="EDDIPGED")
EDDIPGED<-EDDIPGED[-1, 2:3]
colnames(EDDIPGED)<-c("EDDIPGED", "HighGED")
## EDHGCGED CODES (Highest grade before GED)
EDHGCGED<-dd%>%
filter(variable=="EDHGCGED")
EDHGCGED<-EDHGCGED[-1, 2:3]
colnames(EDHGCGED)<-c("EDHGCGED", "HighestGrade")
#### We might prefer to use the simplified EDU variable
EDUSIMPLE<-dd%>%
filter(variable=="EDUSIMPLE")
EDUSIMPLE<-EDUSIMPLE[-1, 2:3]
colnames(EDUSIMPLE)<-c("EDUSIMPLE", "EduSimp")
## SCHLCOLL CODES (School or college attendance)
SCHLCOLL<-dd%>%
filter(variable=="SCHLCOLL")
SCHLCOLL<-SCHLCOLL[-1, 2:3]
colnames(SCHLCOLL)<-c("SCHLCOLL", "SchoolAttend")
## Reason why eligible voter did not vote
VOWHYNOT<-dd%>%
filter(variable=="VOWHYNOT")
VOWHYNOT<-VOWHYNOT[-1, 2:3]
colnames(VOWHYNOT)<-c("VOWHYNOT", "WhyNotVote")
VOWHYNOT[8,2]<-"Registration Problems"
## Reason why eligible voter did not register to vote
VOYNOTREG<-dd%>%
filter(variable=="VOYNOTREG")
VOYNOTREG<-VOYNOTREG[-1, 2:3]
colnames(VOYNOTREG)<-c("VOYNOTREG", "WhyNotReg")
## Method of voting in the most recent November election
VOTEHOW<-dd%>%
filter(variable=="VOTEHOW")
VOTEHOW<-VOTEHOW[-1, 2:3]
colnames(VOTEHOW)<-c("VOTEHOW", "MethodVote")
## Voted on or before election day
VOTEWHEN<-dd%>%
filter(variable=="VOTEWHEN")
VOTEWHEN<-VOTEWHEN[-1, 2:3]
colnames(VOTEWHEN)<-c("VOTEWHEN", "VoteWhen")
## Method of registering to vote
VOREGHOW<-dd%>%
filter(variable=="VOREGHOW")
VOREGHOW<-VOREGHOW[-1, 2:3]
colnames(VOREGHOW)<-c("VOREGHOW", "MethodReg")
## Voted for the most recent November election
VOTED<-dd%>%
filter(variable=="VOTED")
VOTED<-VOTED[-1, 2:3]
colnames(VOTED)<-c("VOTED", "Voted")
## Registered for the most recent November election
VOREG<-dd%>%
filter(variable=="VOREG")
VOREG<-VOREG[-1, 2:3]
colnames(VOREG)<-c("VOREG", "Registered")
trim1618<-data1618%>%
select(YEAR, STATEFIP, METRO, AGE, SEX,
RACESIMPLE, MARRSIMPLE, VETSTAT, CITIZEN,
HISPSIMPLE, LABFORCE, EDUSIMPLE, SCHLCOLL,
VOWHYNOT, VOYNOTREG, VOTEHOW, VOTEWHEN,
VOREGHOW, VOTED, VOREG, VOSUPPWT)%>%
left_join(STATEFIP)%>%
left_join(METRO)%>%
left_join(RACESIMP)%>%
left_join(MARRSIMP)%>%
left_join(VETSTAT)%>%
left_join(CITIZEN)%>%
left_join(HISPSIMP)%>%
left_join(LABFORCE)%>%
left_join(EDUSIMPLE)%>%
left_join(SCHLCOLL)%>%
left_join(VOWHYNOT)%>%
left_join(VOYNOTREG)%>%
left_join(VOTEHOW)%>%
left_join(VOTEWHEN)%>%
left_join(VOREGHOW)%>%
left_join(VOTED)%>%
left_join(VOREG)
## Joining, by = "STATEFIP"
## Joining, by = "METRO"
## Joining, by = "RACESIMPLE"
## Joining, by = "MARRSIMPLE"
## Joining, by = "VETSTAT"
## Joining, by = "CITIZEN"
## Joining, by = "HISPSIMPLE"
## Joining, by = "LABFORCE"
## Joining, by = "EDUSIMPLE"
## Joining, by = "SCHLCOLL"
## Joining, by = "VOWHYNOT"
## Joining, by = "VOYNOTREG"
## Joining, by = "VOTEHOW"
## Joining, by = "VOTEWHEN"
## Joining, by = "VOREGHOW"
## Joining, by = "VOTED"
## Joining, by = "VOREG"
###Why dont people vote?
sumWhy<-trim1618%>%
filter(WhyNotVote!="NIU")%>%
group_by(YEAR, WhyNotVote)%>%
summarise(n=n())
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
ggplot(sumWhy, aes(x=reorder(WhyNotVote, n), y=n, fill=as.factor(YEAR)))+
geom_bar(stat="identity", position="dodge2")+
#facet_grid(.~YEAR)+
coord_flip()+
#theme(legend.position = "none")+
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank())+
ggtitle("Why don't people vote?")
We can see that the main reason why people dont vote is because they are too busy. We should make it more accessible to vote. Also, what is interesting is the second biggest reason why people dont vote is because they feel as if they wouldnt make a difference. After the 2020 election, we have seen that voting DOES make a difference!
method<-trim1618%>%
filter(!MethodVote %in% c("Don't know", "NIU", "Refused", "No Response"))%>%
group_by(YEAR, State, MethodVote)%>%
summarise(nVoteM=n(),
nWgtVoteM=sum(VOSUPPWT, na.rm=TRUE))%>%
mutate(state=State)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
method%>%
filter(YEAR==2016)%>%
ggplot(aes(x=1, y=nWgtVoteM, fill = MethodVote)) +
geom_col(position="fill") +
#coord_flip() +
facet_geo(~ state) +
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
labs(x="", y = "",
caption = "Voting by mail is more popular in the West Coast (Based on data from IPUMS)",
fill = 'Method of Voting',
title=paste("Most Popular Method of Voting in 2016"))
method<-trim1618%>%
filter(!MethodVote %in% c("Don't know", "NIU", "Refused", "No Response"))%>%
group_by(YEAR, State, MethodVote)%>%
summarise(nVoteM=n(),
nWgtVoteM=sum(VOSUPPWT, na.rm=TRUE))%>%
mutate(state=State)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
method%>%
filter(YEAR==2018)%>%
ggplot(aes(x=1, y=nWgtVoteM, fill = MethodVote)) +
geom_col(position="fill") +
#coord_flip() +
facet_geo(~ state) +
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
labs(x="", y = "",
caption = "Voting by mail is more popular in the West Coast (Based on data from IPUMS)",
fill = 'Method of Voting',
title=paste("Most Popular Method of Voting in 2018"))
From this plot, it is shown that mainly only the west coast states voted by mail and everyone else prefers in person.
states <- usmap::us_map()
state18<-trim1618%>%
group_by(YEAR, State, Voted)%>%
summarise(nVote=n(),
nWgtVote=sum(VOSUPPWT, na.rm=TRUE))
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
state18T<-trim1618%>%
group_by(YEAR, State)%>%
summarise(n=n(),
nWgt=sum(VOSUPPWT, na.rm=TRUE))
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
statePropVote<-state18%>%
filter(Voted=="Voted")%>%
left_join(state18T)%>%
mutate(sampPropVote=nVote/n,
wgtPropVote=nWgtVote/nWgt)
## Joining, by = c("YEAR", "State")
mapPropVote<-states%>%
mutate(State=full)%>%
left_join(statePropVote)
## Joining, by = "State"
p<-mapPropVote%>%
ggplot(aes(x, y, group = group)) +
geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
labs(x="", y = "",
caption = "(Based on data from IPUMS)",
fill = 'Percent',
title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
facet_grid(.~YEAR)+
scale_fill_viridis(direction = -1)
## Warning: Ignoring unknown aesthetics: text
Voter turn out by state in the two different years
ggplotly(p)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
It shows that in 2018, the voting population in the more lighter states has gone down.
ggplot(trim1618, aes(VoteWhen, fill=RaceSimp))+
geom_bar(position = "fill")+
facet_grid(.~YEAR)
### Voter turn out vs Race
ggplot(trim1618, aes(Voted, fill=RaceSimp))+
geom_bar(position = "fill")+
facet_grid(.~YEAR)
From this graph, it is evident that people who are not white, dont vote as much. We should encourage these groups to vote more.
trim1618$EduSimp<-factor(trim1618$EduSimp, levels =c("No school", "Some school but no diploma", "High school graduate or GED", "Some college but no degree", "Associate degree", "Bachelors degree", "Masters degree", "Professional or Doctoral degree"))
ggplot(trim1618, aes(Voted, fill=EduSimp))+ geom_bar()+ facet_grid(.~YEAR)
From this graph we can see that people with higher education vote more. We can target people with lower education statuses to vote.
ggplot(trim1618, aes(VoteWhen, fill=EduSimp))+
geom_bar(position = "fill")+
facet_grid(.~YEAR)
We need to encourage more to vote before election day
###Voter turnout vs Age
ggplot(trim1618, aes(x=AGE, fill=Voted))+
geom_boxplot()
Older people tend to vote. We should be targeting colleges an encourage them to vote. One way to do this is to have a voting registration booth so students who are unregistered can register to vote.
ggplot(trim1618, aes(x=MartialSimp, fill=Voted))+
geom_bar()
` ###When people voted vs Age
ggplot(trim1618, aes(AGE, fill=VoteWhen))+
geom_bar(position = "fill")+
facet_grid(.~YEAR)
m2 <- glm(as.factor(Voted) ~ AGE+as.factor(SEX)+Metro+RaceSimp+
Vet+Citizen+Labor+EduSimp, data = trim1618, family = "binomial")
summary(m2)
##
## Call:
## glm(formula = as.factor(Voted) ~ AGE + as.factor(SEX) + Metro +
## RaceSimp + Vet + Citizen + Labor + EduSimp, family = "binomial",
## data = trim1618)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6034 -1.0212 0.5681 0.8140 2.2363
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.267712 0.179559 -18.199 < 2e-16
## AGE 0.031703 0.000388 81.699 < 2e-16
## as.factor(SEX)2 0.117764 0.012720 9.258 < 2e-16
## MetroCentral city status unknown -0.130950 0.019700 -6.647 2.99e-11
## MetroNot identifiable -0.105859 0.059319 -1.785 0.074330
## MetroNot in metro area -0.090859 0.019002 -4.782 1.74e-06
## MetroOutside central city 0.001972 0.016463 0.120 0.904652
## RaceSimpAsian or Pacific Islander -0.108298 0.058756 -1.843 0.065305
## RaceSimpBlack 0.738456 0.053357 13.840 < 2e-16
## RaceSimpMore than one race 0.338411 0.067257 5.032 4.86e-07
## RaceSimpWhite 0.437931 0.050100 8.741 < 2e-16
## VetYes 0.088528 0.023876 3.708 0.000209
## CitizenBorn in U.S 0.101335 0.064150 1.580 0.114183
## CitizenBorn in U.S. outlying -0.357707 0.100756 -3.550 0.000385
## CitizenNaturalized citizen -0.268012 0.067519 -3.969 7.21e-05
## LaborYes, in the labor force 0.307313 0.014359 21.402 < 2e-16
## EduSimpSome school but no diploma 0.540822 0.160149 3.377 0.000733
## EduSimpHigh school graduate or GED 1.309406 0.159446 8.212 < 2e-16
## EduSimpSome college but no degree 1.942360 0.159777 12.157 < 2e-16
## EduSimpAssociate degree 2.038980 0.160287 12.721 < 2e-16
## EduSimpBachelors degree 2.654500 0.159937 16.597 < 2e-16
## EduSimpMasters degree 2.967957 0.161445 18.384 < 2e-16
## EduSimpProfessional or Doctoral degree 3.078902 0.165914 18.557 < 2e-16
##
## (Intercept) ***
## AGE ***
## as.factor(SEX)2 ***
## MetroCentral city status unknown ***
## MetroNot identifiable .
## MetroNot in metro area ***
## MetroOutside central city
## RaceSimpAsian or Pacific Islander .
## RaceSimpBlack ***
## RaceSimpMore than one race ***
## RaceSimpWhite ***
## VetYes ***
## CitizenBorn in U.S
## CitizenBorn in U.S. outlying ***
## CitizenNaturalized citizen ***
## LaborYes, in the labor force ***
## EduSimpSome school but no diploma ***
## EduSimpHigh school graduate or GED ***
## EduSimpSome college but no degree ***
## EduSimpAssociate degree ***
## EduSimpBachelors degree ***
## EduSimpMasters degree ***
## EduSimpProfessional or Doctoral degree ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 186890 on 152823 degrees of freedom
## Residual deviance: 164164 on 152801 degrees of freedom
## AIC: 164210
##
## Number of Fisher Scoring iterations: 4
From this general linear model, we can see that almost all variables are significant.
methodT<-method%>%
group_by(YEAR, State, state)%>%
summarise(nVoteTot=sum(nVoteM),
wgtVoteTot=sum(nWgtVoteM))%>%
left_join(method)%>%
mutate(pctMethodVote=nVoteM/nVoteTot,
wgtPctMethodVote=nWgtVoteM/wgtVoteTot)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
## Joining, by = c("YEAR", "State", "state")
mailIn<-methodT%>%
filter(MethodVote=="By mail")%>%
left_join(statePropVote)
## Joining, by = c("YEAR", "State")
p<-mailIn%>%
filter(YEAR==2016)%>%
ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
geom_point(aes(text=State), alpha=.7)+
geom_smooth(method="lm", se=FALSE)+
theme_minimal()+
labs(x="Percent Mail-in Votes (Weighted)", y = "Voter Turn-out (Weighted)",
caption = "(2016 Election 2016 Based on data from IPUMS)",
color = 'Population',
title="Mail-in Votes Across the States in 2016")+
scale_color_viridis(direction = -1)+
scale_size(trans="sqrt", range=c(0.1, 7))
## Warning: Ignoring unknown aesthetics: text
ggplotly(p, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
### MAIL IN VOTES BY STATE in 2016
methodT<-method%>%
group_by(YEAR, State, state)%>%
summarise(nVoteTot=sum(nVoteM),
wgtVoteTot=sum(nWgtVoteM))%>%
left_join(method)%>%
mutate(pctMethodVote=nVoteM/nVoteTot,
wgtPctMethodVote=nWgtVoteM/wgtVoteTot)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
## Joining, by = c("YEAR", "State", "state")
mailIn<-methodT%>%
filter(MethodVote=="By mail")%>%
left_join(statePropVote)
## Joining, by = c("YEAR", "State")
w<-mailIn%>%
filter(YEAR==2018)%>%
ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
geom_point(aes(text=State), alpha=.7)+
geom_smooth(method="lm", se=FALSE)+
theme_minimal()+
labs(x="Percent Mail-in Votes (Weighted)", y = "Voter Turn-out (Weighted)",
caption = "(2016 Election 2016 Based on data from IPUMS)",
color = 'Population',
title="Mail-in Votes Across the States in 2018")+
scale_color_viridis(direction = -1)+
scale_size(trans="sqrt", range=c(0.1, 7))
## Warning: Ignoring unknown aesthetics: text
ggplotly(w, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
methodT<-method%>%
group_by(YEAR, State, state)%>%
summarise(nVoteTot=sum(nVoteM),
wgtVoteTot=sum(nWgtVoteM))%>%
left_join(method)%>%
mutate(pctMethodVote=nVoteM/nVoteTot,
wgtPctMethodVote=nWgtVoteM/wgtVoteTot)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
## Joining, by = c("YEAR", "State", "state")
InPerson<-methodT%>%
filter(MethodVote=="In person")%>%
left_join(statePropVote)
## Joining, by = c("YEAR", "State")
q<-InPerson%>%
filter(YEAR==2016)%>%
ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
geom_point(aes(text=State), alpha=.7)+
geom_smooth(method="lm", se=FALSE)+
theme_minimal()+
labs(x="Percent In Person Votes (Weighted)", y = "Voter Turn-out (Weighted)",
caption = "(2016 Election 2016 Based on data from IPUMS)",
color = 'Population',
title="In Peson Votes in 2016")+
scale_color_viridis(direction = -1)+
scale_size(trans="sqrt", range=c(0.1, 7))
## Warning: Ignoring unknown aesthetics: text
ggplotly(q, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
### In person voting by states in 2018
methodT<-method%>%
group_by(YEAR, State, state)%>%
summarise(nVoteTot=sum(nVoteM),
wgtVoteTot=sum(nWgtVoteM))%>%
left_join(method)%>%
mutate(pctMethodVote=nVoteM/nVoteTot,
wgtPctMethodVote=nWgtVoteM/wgtVoteTot)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
## Joining, by = c("YEAR", "State", "state")
InPerson<-methodT%>%
filter(MethodVote=="In person")%>%
left_join(statePropVote)
## Joining, by = c("YEAR", "State")
g<-InPerson%>%
filter(YEAR==2018)%>%
ggplot(aes(x=wgtPctMethodVote, y=wgtPropVote, color=nWgt, size=nWgt))+
geom_point(aes(text=State), alpha=.7)+
geom_smooth(method="lm", se=FALSE)+
theme_minimal()+
labs(x="Percent In Person Votes (Weighted)", y = "Voter Turn-out (Weighted)",
caption = "(2016 Election 2016 Based on data from IPUMS)",
color = 'Population',
title="In Peson Votes in 2018")+
scale_color_viridis(direction = -1)+
scale_size(trans="sqrt", range=c(0.1, 7))
## Warning: Ignoring unknown aesthetics: text
ggplotly(g, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'