I uploaded the data to my github:
library(tidyverse)
### this just puts the 2016 and 2018 datasets together
data1618<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_2016_2018.csv",
header=TRUE)
#dim(data1618)
#names(data1618)
## data dictionary
dd<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2020/ff4d4795566a553cade80ca6e6fe15ea69ee6e1b/data/data_dictionary.csv",
header=TRUE,
stringsAsFactors = FALSE)
Label dataframes:
## STATE FIP CODES
STATEFIP<-dd%>%
filter(variable=="STATEFIP")
STATEFIP<-STATEFIP[-1, 2:3]
colnames(STATEFIP)<-c("STATEFIP", "State")
## METRO CODES
METRO<-dd%>%
filter(variable=="METRO")
METRO<-METRO[-1, 2:3]
colnames(METRO)<-c("METRO", "Metro")
## RACE CODES
RACE<-dd%>%
filter(variable=="RACE")
RACE<-RACE[-1, 2:3]
colnames(RACE)<-c("RACE", "Race")
#### We might prefer to use the simplified RACE variable
RACESIMP<-dd%>%
filter(variable=="RACESIMPLE")
RACESIMP<-RACESIMP[-1, 2:3]
colnames(RACESIMP)<-c("RACESIMPLE", "RaceSimp")
## Martial Status (MARST) CODES
MARST<-dd%>%
filter(variable=="MARST")
MARST<-MARST[-1, 2:3]
colnames(MARST)<-c("MARST", "Martial")
#### We might prefer to use the simplified MARST variable
MARRSIMP<-dd%>%
filter(variable=="MARRSIMPLE")
MARRSIMP<-MARRSIMP[-1, 2:3]
colnames(MARRSIMP)<-c("MARRSIMPLE", "MartialSimp")
## VETSTAT CODES
VETSTAT<-dd%>%
filter(variable=="VETSTAT")
VETSTAT<-VETSTAT[-1, 2:3]
colnames(VETSTAT)<-c("VETSTAT", "Vet")
## CITIZEN CODES
CITIZEN<-dd%>%
filter(variable=="CITIZEN")
CITIZEN<-CITIZEN[-1, 2:3]
colnames(CITIZEN)<-c("CITIZEN", "Citizen")
## HISPAN CODES
HISPAN<-dd%>%
filter(variable=="HISPAN")
HISPAN<-HISPAN[-1, 2:3]
colnames(HISPAN)<-c("HISPAN", "Hispanic")
#### We might prefer to use the simplified HISPAN variable
HISPSIMP<-dd%>%
filter(variable=="HISPSIMPLE")
HISPSIMP<-HISPSIMP[-1, 2:3]
colnames(HISPSIMP)<-c("HISPSIMPLE", "HispanSimp")
## LABFORCE CODES
LABFORCE<-dd%>%
filter(variable=="LABFORCE")
LABFORCE<-LABFORCE[-1, 2:3]
colnames(LABFORCE)<-c("LABFORCE", "Labor")
## EDUC99 CODES (Education Attainment)
EDUC99<-dd%>%
filter(variable=="EDUC99")
EDUC99<-EDUC99[-1, 2:3]
colnames(EDUC99)<-c("EDUC99", "Edu1990")
## EDCYC CODES (Years of college credit)
EDCYC<-dd%>%
filter(variable=="EDCYC")
EDCYC<-EDCYC[-1, 2:3]
colnames(EDCYC)<-c("EDCYC", "College")
## EDDIPGED CODES (Highschool or GED)
EDDIPGED<-dd%>%
filter(variable=="EDDIPGED")
EDDIPGED<-EDDIPGED[-1, 2:3]
colnames(EDDIPGED)<-c("EDDIPGED", "HighGED")
## EDHGCGED CODES (Highest grade before GED)
EDHGCGED<-dd%>%
filter(variable=="EDHGCGED")
EDHGCGED<-EDHGCGED[-1, 2:3]
colnames(EDHGCGED)<-c("EDHGCGED", "HighestGrade")
#### We might prefer to use the simplified EDU variable
EDUSIMPLE<-dd%>%
filter(variable=="EDUSIMPLE")
EDUSIMPLE<-EDUSIMPLE[-1, 2:3]
colnames(EDUSIMPLE)<-c("EDUSIMPLE", "EduSimp")
## SCHLCOLL CODES (School or college attendance)
SCHLCOLL<-dd%>%
filter(variable=="SCHLCOLL")
SCHLCOLL<-SCHLCOLL[-1, 2:3]
colnames(SCHLCOLL)<-c("SCHLCOLL", "SchoolAttend")
## Reason why eligible voter did not vote
VOWHYNOT<-dd%>%
filter(variable=="VOWHYNOT")
VOWHYNOT<-VOWHYNOT[-1, 2:3]
colnames(VOWHYNOT)<-c("VOWHYNOT", "WhyNotVote")
VOWHYNOT[8,2]<-"Registration Problems"
## Reason why eligible voter did not register to vote
VOYNOTREG<-dd%>%
filter(variable=="VOYNOTREG")
VOYNOTREG<-VOYNOTREG[-1, 2:3]
colnames(VOYNOTREG)<-c("VOYNOTREG", "WhyNotReg")
## Method of voting in the most recent November election
VOTEHOW<-dd%>%
filter(variable=="VOTEHOW")
VOTEHOW<-VOTEHOW[-1, 2:3]
colnames(VOTEHOW)<-c("VOTEHOW", "MethodVote")
## Voted on or before election day
VOTEWHEN<-dd%>%
filter(variable=="VOTEWHEN")
VOTEWHEN<-VOTEWHEN[-1, 2:3]
colnames(VOTEWHEN)<-c("VOTEWHEN", "VoteWhen")
## Method of registering to vote
VOREGHOW<-dd%>%
filter(variable=="VOREGHOW")
VOREGHOW<-VOREGHOW[-1, 2:3]
colnames(VOREGHOW)<-c("VOREGHOW", "MethodReg")
## Voted for the most recent November election
VOTED<-dd%>%
filter(variable=="VOTED")
VOTED<-VOTED[-1, 2:3]
colnames(VOTED)<-c("VOTED", "Voted")
## Registered for the most recent November election
VOREG<-dd%>%
filter(variable=="VOREG")
VOREG<-VOREG[-1, 2:3]
colnames(VOREG)<-c("VOREG", "Registered")
##### Select Columns and join for labels
trim1618<-data1618%>%
select(YEAR, STATEFIP, METRO, AGE, SEX,
RACESIMPLE, MARRSIMPLE, VETSTAT, CITIZEN,
HISPSIMPLE, LABFORCE, EDUSIMPLE, SCHLCOLL,
VOWHYNOT, VOYNOTREG, VOTEHOW, VOTEWHEN,
VOREGHOW, VOTED, VOREG, VOSUPPWT)%>%
left_join(STATEFIP)%>%
left_join(METRO)%>%
left_join(RACESIMP)%>%
left_join(MARRSIMP)%>%
left_join(VETSTAT)%>%
left_join(CITIZEN)%>%
left_join(HISPSIMP)%>%
left_join(LABFORCE)%>%
left_join(EDUSIMPLE)%>%
left_join(SCHLCOLL)%>%
left_join(VOWHYNOT)%>%
left_join(VOYNOTREG)%>%
left_join(VOTEHOW)%>%
left_join(VOTEWHEN)%>%
left_join(VOREGHOW)%>%
left_join(VOTED)%>%
left_join(VOREG)
## Joining, by = "STATEFIP"
## Joining, by = "METRO"
## Joining, by = "RACESIMPLE"
## Joining, by = "MARRSIMPLE"
## Joining, by = "VETSTAT"
## Joining, by = "CITIZEN"
## Joining, by = "HISPSIMPLE"
## Joining, by = "LABFORCE"
## Joining, by = "EDUSIMPLE"
## Joining, by = "SCHLCOLL"
## Joining, by = "VOWHYNOT"
## Joining, by = "VOYNOTREG"
## Joining, by = "VOTEHOW"
## Joining, by = "VOTEWHEN"
## Joining, by = "VOREGHOW"
## Joining, by = "VOTED"
## Joining, by = "VOREG"
# RACE and VOTED
ggplot(trim1618, aes(Voted, fill=RaceSimp))+
geom_bar(position = "fill")+
facet_grid(.~YEAR)
ggplot(trim1618, aes(Voted, fill=RaceSimp))+
geom_bar()+
facet_grid(.~YEAR)
# The higher the education the more likely to vote
ggplot(trim1618, aes(YEAR, fill=Voted))+
geom_bar(position = "fill")+
facet_grid(.~EDUSIMPLE)
## MARRIED
# more likely to vote if married
ggplot(trim1618, aes(YEAR, fill=Voted))+
geom_bar(position = "fill")+
facet_grid(.~MARRSIMPLE)
## REASONS FOR NOT VOTING
sumWhy<-trim1618%>%
filter(WhyNotVote!="NIU")%>%
group_by(YEAR, WhyNotVote)%>%
summarise(n=n())
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
ggplot(sumWhy, aes(x=reorder(WhyNotVote, n), y=n, fill=as.factor(YEAR)))+
geom_bar(stat="identity", position="dodge2")+
#facet_grid(.~YEAR)+
coord_flip()+
#theme(legend.position = "none")+
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank())+
ggtitle("Why don't people vote?")
#install.packages("usmap")
library(usmap)
## Warning: package 'usmap' was built under R version 3.6.2
#install.packages("plotly")
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#install.packages("viridis")
library(viridis)
## Loading required package: viridisLite
states <- usmap::us_map()
state18<-trim1618%>%
group_by(YEAR, State, Voted)%>%
summarise(nVote=n(),
nWgtVote=sum(VOSUPPWT, na.rm=TRUE))
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
#head(state18)
state18T<-trim1618%>%
group_by(YEAR, State)%>%
summarise(n=n(),
nWgt=sum(VOSUPPWT, na.rm=TRUE))
## `summarise()` regrouping output by 'YEAR' (override with `.groups` argument)
statePropVote<-state18%>%
filter(Voted=="Voted")%>%
left_join(state18T)%>%
mutate(sampPropVote=nVote/n,
wgtPropVote=nWgtVote/nWgt)
## Joining, by = c("YEAR", "State")
#hist(statePropVote$wgtPropVote)
mapPropVote<-states%>%
mutate(State=full)%>%
left_join(statePropVote)
## Joining, by = "State"
this.year=2016
p<-mapPropVote%>%
filter(YEAR==this.year)%>%
ggplot(aes(x, y, group = group)) +
geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
labs(x="", y = "",
caption = "(Based on data from IPUMS)",
fill = 'Percent',
title=paste( this.year, " Voter Turn-out"))+
scale_fill_viridis(option="magma", direction = -1)
## Warning: Ignoring unknown aesthetics: text
ggplotly(p)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#Facet to maintain comparisons
p<-mapPropVote%>%
ggplot(aes(x, y, group = group)) +
geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
labs(x="", y = "",
caption = "(Based on data from IPUMS)",
fill = 'Percent',
title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
facet_grid(.~YEAR)+
scale_fill_viridis(option="magma", direction = -1)
## Warning: Ignoring unknown aesthetics: text
ggplotly(p)
### Slightly different colors
p<-mapPropVote%>%
ggplot(aes(x, y, group = group)) +
geom_polygon(aes(text=State, fill = wgtPropVote),color="black")+
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
labs(x="", y = "",
caption = "(Based on data from IPUMS)",
fill = 'Percent',
title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
facet_grid(.~YEAR)+
scale_fill_viridis(direction = -1)
## Warning: Ignoring unknown aesthetics: text
ggplotly(p)
method<-trim1618%>%
filter(!MethodVote %in% c("Don't know", "NIU", "Refused", "No Response"))%>%
group_by(YEAR, State, MethodVote)%>%
summarise(nVoteM=n(),
nWgtVoteM=sum(VOSUPPWT, na.rm=TRUE))%>%
mutate(state=State)
## `summarise()` regrouping output by 'YEAR', 'State' (override with `.groups` argument)
unique(method$MethodVote)
## [1] "By mail" "In person"
#install.packages("fivethirtyeight")
#install.packages("gridExtra")
library(fivethirtyeight)
## Warning: package 'fivethirtyeight' was built under R version 3.6.2
## Some larger datasets need to be installed separately, like senators and
## house_district_forecast. To install these, we recommend you install the
## fivethirtyeightdata package by running:
## install.packages('fivethirtyeightdata', repos =
## 'https://fivethirtyeightdata.github.io/drat/', type = 'source')
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
#install.packages("geofacet")
library(geofacet)
## Warning: package 'geofacet' was built under R version 3.6.2
method%>%
filter(YEAR==2016)%>%
ggplot(aes(x=1, y=nWgtVoteM, fill = MethodVote)) +
geom_col(position="fill") +
#coord_flip() +
facet_geo(~ state) +
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+
labs(x="", y = "",
caption = "Voting by mail is more popular in the West Coast (Based on data from IPUMS)",
fill = 'Method of Voting',
title=paste("Most Popular Method of Voting in 2016"))