# Load the tidyverse
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Import data
vreg<-read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/voter-registration/new-voter-registrations.csv",
header=TRUE)
# Level the Month variable so that its in the right order (ie not alphabetical)
vreg$Month<-factor(vreg$Month,
levels=c("Jan", "Feb", "Mar", "Apr", "May"))
### USE spread() FROM tidyr
vregYear<-vreg%>%
spread(Year, New.registered.voters)
### RENAME THE COLUMNS
colnames(vregYear)<-c("Jurisdiction", "Month", "Y2016", "Y2020")
### mutate() FROM dplyr()
vregChange<-vregYear%>%
mutate(change=Y2020-Y2016)%>%
mutate(posorneg=ifelse(change>0, TRUE, FALSE))
#View(vregChange)
ggplot(vregChange, aes(Month, change/1000 ,fill=posorneg))+
#basic geometries
geom_bar(position ="dodge", stat="identity")+
facet_wrap(~Jurisdiction, scales="free_y")+
#labels and scales
scale_y_continuous(labels = scales::unit_format(unit="K"), n.breaks=NULL)+
#note-> I tried setting different breaks for the y scale but since there was such a large range for different states, I decided to let R scale it automatically
scale_x_discrete(labels = c("Jan.","" ,"", "", "May"))+
labs(title= "Voter registration dropped dramatically during the pandemic", subtitle = "Difference in the number of newly registered voters for each morth in 2020 compared to the same month in 2016")+
#theme
theme_void()+
theme(
plot.title=element_text(face="bold",hjust=0.5),
plot.subtitle=element_text(hjust=0.5,size=9),
strip.text = element_text(face="bold", margin = margin(0.5,0.1,0.1,0.15,"cm")),
panel.grid.major.y=element_line(linetype = "solid", color="grey"),
axis.text.y=element_text(color="grey"),
axis.text.x.bottom =element_text(color="grey"),
plot.margin=margin(0.7,0.4,0.1,0.2,"cm"),
legend.position="none"
)
#adding a y labeling system
addK <- function(x, ...) #<== function will add " %" to any number, and allows for any additional formatting through "format".
format(paste0(x, " K"), ...)
years<-3 #so R knows how many years are in between the data
ggplot(vregChange, color=Month) +
#basic geometries
geom_segment(aes(x=0,xend=years, y=Y2016/1000,yend=Y2020/1000, colour=Month), size=.9)+
facet_wrap(~Jurisdiction, scales="free_y")+
#labels and scale
scale_y_continuous(labels=addK)+
labs(x= "2016 versus 2020", y="Voter Registration", title="Monthly Voter Registration between 2016 and 2020",
subtitle="Has covid caused voter registration to drop?")+
#theme
theme_void()+
theme(
#more labels! also labeling related commands
plot.title=element_text(face="bold",hjust=0.5, size=16),
plot.subtitle=element_text(hjust=0.5,size=13),
strip.text = element_text(face="bold",size=6, margin = margin(0.5,0.1,0.1,0.15,"cm")),
panel.grid.major.y=element_line(linetype = "solid", color="grey"),
axis.title.x = element_text(size=13, face="bold", margin=margin(0.5,0.5,0.5,0.5,"cm")),
axis.title.y = element_text(size=13, face="bold", angle=90, margin=margin(0.5,0.5,0.5,0.5,"cm")),
axis.text.y=element_text(color="grey"),
plot.margin=margin(0.7,0.4,0.3,0.3,"cm"),
#making the legend bigger and easier to read
legend.text = element_text(size=11, face="bold"),
legend.key.size = unit(1, 'cm'),
legend.key.height = unit(.9, 'cm'),
legend.key.width = unit(1.2, 'cm'),
legend.title = element_text(face="bold", size=14)
)
Wahoo! Here’s my recreated graphic! I thought only seeing the change in voter registration numbers each month was helpful in communicating what the designer wanted (an overall drop in voter registration) but I felt like some states had increases in registration some months and the registration numbers shouldn’t be left out. I went with a slope graph so the viewer could see downward trends through slope and they could also see the registration numbers. This way the viewer can draw their own conclusions about how significantly covid caused voter registrations to drop.
I also added in guiding lines for the y axis (panel.grid.major.y). I thought adding the same scale on both sides would make the overall graphic too jumbled even though numbers on both sides are super common for slope graphs.