This coding sample is adapted from a prelude to my senior honors thesis, “Riot in the Party: Voter Registrations in the Aftermath of the January 6, 2021 Capitol Insurrection” (working version from November linked here). For this section of my thesis, I aimed to examine the characteristics of Florida registered voters who have been arrested for participating in the Capitol insurrection on January 6, 2021. To begin, I took the Florida voter file (available through the Florida Board of Elections) and a database from USA Today containing information on those arrested. Here is what both datasets look like:
Example of Florida Voter File Formatting
| CountyCode | NameLast | NameFirst | NameMiddle | VoterID |
|---|---|---|---|---|
| ALA | Smith | John | James | 100101101 |
| Gender | Race | BirthDate | RegistrationDate | PartyAffiliation |
|---|---|---|---|---|
| M | 5 | 1/1/1960 | 10/8/1980 | REP |
| Precinct | Vote2020Gen | Vote2020P | Vote2018Gen | Vote2016Gen | Vote2016P |
|---|---|---|---|---|---|
| 4 | 1 | NA | NA | 1 | 1 |
Example of Rioter Data Formatting
| Name | Age |
|---|---|
| Adam Christian Johnson | 36 |
Obviously, we need to do some work if we want to merge these datasets. Here is some code creating an Age column from the BirthDate column in the voter file. We’ll also recode the Race column using the data manual and create an Age Category column for later analysis. Finally, we’ll convert the NAs in the Vote columns to 0s and parse Name in the voter data to be a single string prior to joining the datasets by filtering on these two variables:
FLVF2021$BirthDate <- as.Date(FLVF2021$BirthDate, "%m/%d/%Y")
FLVF2021$Age <- trunc((FLVF2021$BirthDate %--% "2021-01-01") / years(1))
FLVF2021$Race <- recode(FLVF2021$Race, 1 = "Other", 2 = "Other", 3 = "Black", 4 = "Hispanic", 5 = "White", 6 = "Other", 7 = "Other", 9 = "Other")
FLVF2021$Agecat <- with(FLVF2021, ifelse(Age >= 18 & Age < 24, "18-23",
ifelse(Age >= 24 & Age < 30, "24-29",
ifelse(Age >= 30 & Age < 45, "30-44",
ifelse(Age >= 45 & Age < 65, "45-64",
ifelse(Age >= 65 & Age < 106, "65-105", "Other"))))))
FLVF2021$Vote2016P[is.na(FLVF2021$Vote2016P)] <- 0
FLVF2021$Vote2016Gen[is.na(FLVF2021$Vote2016Gen)] <- 0
FLVF2021$Vote2018Gen[is.na(FLVF2021$Vote2018Gen)] <- 0
FLVF2021$Vote2020P[is.na(FLVF2021$Vote2020P)] <- 0
FLVF2021$Vote2020Gen[is.na(FLVF2021$Vote2020Gen)] <- 0
FLVF2021$Name <- paste(FLVF2021$NameFirst, FLVF2021$NameMiddle, FLVF2021$NameLast)
rioters2021<-filter(FLVF2021, Name %in% rioters$Name & Age %in% rioters$Age)After confirming that we have found the correct people, we can begin with our analysis. I wanted to compare the demographics of the rioters with the rest of the Florida population. Here is how I coded those tables.
riottable <- rbind(data.frame(round(prop.table(table(rioters2021$PartyAffiliation)), 4) * 100),
data.frame(round(prop.table(table(rioters2021$Gender)), 4) * 100),
data.frame(round(prop.table(table(rioters2021$Race)), 4) * 100),
data.frame(round(prop.table(table(rioters2021$Agecat)), 4) * 100))
regtable <- rbind(data.frame(round(prop.table(table(FLVF2021$PartyAffiliation)), 4) * 100),
data.frame(round(prop.table(table(FLVF2021$Gender)), 4) * 100),
data.frame(round(prop.table(table(FLVF2021$Race)), 4) * 100),
data.frame(round(prop.table(table(FLVF2021$Agecat)), 4) * 100))
demotable <- left_join(riottable, regtable, by = "Var1")
riot2020 <- filter(rioters2021, RegistrationDate <= "2020-10-05")
reg2020 <- filter(FLVF2021, RegistrationDate <= "2020-10-05")
riot2020p <- filter(rioters2021, RegistrationDate <= "2020-02-17")
reg2020p <- filter(FLVF2021, RegistrationDate <= "2020-02-17")
riot2018 <- filter(rioters2021, RegistrationDate <= "2018-10-9")
reg2018 <- filter(FLVF2021, RegistrationDate <= "2018-10-9")
riot2016 <- filter(rioters2021, RegistrationDate <= "2016-10-18")
reg2016 <- filter(FLVF2021, RegistrationDate <= "2016-10-18")
riotvote <- rbind(data.frame(round(prop.table(table(riot2020$Vote2020Gen)), 4) * 100),
data.frame(round(prop.table(table(riot2020p$Vote2020P)), 4) * 100),
data.frame(round(prop.table(table(riot2018$Vote2018Gen)), 4) * 100),
data.frame(round(prop.table(table(riot2016$Vote2016Gen)), 4) * 100))
regvote <- rbind(data.frame(round(prop.table(table(reg2020$Vote2020Gen)), 4) * 100),
data.frame(round(prop.table(table(reg2020p$Vote2020P)), 4) * 100),
data.frame(round(prop.table(table(reg2018$Vote2018Gen)), 4) * 100),
data.frame(round(prop.table(table(reg2016$Vote2016Gen)), 4) * 100))
votetable <- cbind(riotvote, regvote)
votetable <- votetable[,-3]
votetable <- filter(votetable, Var1 == "1")
votetable$Var1 <- c("2020 General", "2020 Primary", "2018 General", "2016 General")
colnames(votetable) <- c("Var1", "Freq.x", "Freq.y")
demotablefull <- rbind(demotable, votetable)
names(demotablefull)<-c("Descriptor", "% Rioters", "% All Registered Voters")
demotablefull <- demotablefull[-c(4,11),]
demotablefull$Category<-c("Party", " ", " ", " ", "Gender", " ", " ", "Race", " ", " ", " ", "Age", " ", " ", " ", " ", "Turnout", " ", " ", " ")
demotablefull <- demotablefull[, c(4, 1, 2, 3)]
demotablefull$Descriptor<-recode(demotablefull$Descriptor, "DEM" = "Democrat", "REP" = "Republican", "F" = "Female", "M" = "Male", "U" = "Unspecified")
demotablefull$`% Difference`<-(demotablefull$`% Rioters` - demotablefull$`% All Registered Voters`)
knitr::kable(demotablefull, row.names = F)| Category | Descriptor | % Rioters | % All Registered Voters | % Difference |
|---|---|---|---|---|
| Party | Democrat | 1.30 | 36.50 | -35.20 |
| NPA | 9.09 | 26.50 | -17.41 | |
| Other | 3.90 | 1.58 | 2.32 | |
| Republican | 85.71 | 35.42 | 50.29 | |
| Gender | Female | 10.39 | 52.00 | -41.61 |
| Male | 85.71 | 45.03 | 40.68 | |
| Unspecified | 3.90 | 2.97 | 0.93 | |
| Race | Black | 1.30 | 13.51 | -12.21 |
| Hispanic | 10.39 | 17.41 | -7.02 | |
| Other | 5.19 | 7.89 | -2.70 | |
| White | 83.12 | 61.18 | 21.94 | |
| Age | 18-23 | 5.19 | 7.88 | -2.69 |
| 24-29 | 11.69 | 9.33 | 2.36 | |
| 30-44 | 44.16 | 22.79 | 21.37 | |
| 45-64 | 33.77 | 32.71 | 1.06 | |
| 65-105 | 5.19 | 27.29 | -22.10 | |
| Turnout | 2020 General | 96.05 | 74.59 | 21.46 |
| 2020 Primary | 32.84 | 21.31 | 11.53 | |
| 2018 General | 84.13 | 61.46 | 22.67 | |
| 2016 General | 91.23 | 75.33 | 15.90 |
This code binds together two tables that display the party, gender, race, and age distributions of the rioters versus the same distributions for the general Florida voter file. For the turnout rates, I first had to filter the rioter data and the Florida voter file to include only those who were eligible to vote in the election under consideration; in other words, to include only those who were registered 27 or more days prior to the election date. Then, I ran a similar code to bind these tables, calculated the difference in the distributions in a new column, and finally altered the labels and layout before the table was complete.
While this table tells us some remarkable things about the rioter’s characteristics (they are overwhelmingly Republican, white, and male, as well as high-propensity voters), I further wanted to determine if these rioters came from counties in Florida that overwhelmingly supported Trump in the 2020 General Election. I downloaded the 2020 Election Results by County in Florida from the Florida Department of Elections. Then, I read this data into R and created a map displaying Trump’s vote total (in red) versus the number of rioters that were from each county (size of the circle). If no circle is displayed, then there were no rioters arrested from that county. Clicking on the counties will produce a pop-up menu displaying both of these statistics.
electionresults <- read.delim("/Users/saraloving/Downloads/11032020Election.txt", header = T)
electionresults <- filter(electionresults, RaceCode == "PRE")
electionresults <- filter(electionresults, CanNameFirst %in% c("Trump", "Biden"))
votetotals <- aggregate(x = electionresults$CanVotes, by =
list(electionresults$CountyCode), FUN = sum)
electionresults <- left_join(electionresults, votetotals, by = c("CountyCode"="Group.1"))
electionresults$VotePercent <- round(electionresults$CanVotes / electionresults$x, 4) * 100
trumpvotes <- filter(electionresults, CanNameFirst == "Trump")
trumpvotes$CountyName<-recode(trumpvotes$CountyName, "Desoto" = "DeSoto")
trumpvotes$`Trump Support in 2020 Election` <- with(trumpvotes, ifelse(VotePercent >= 30 & VotePercent <=40, "30% - 40%",
ifelse(VotePercent > 40 & VotePercent <= 50, "40% - 50%",
ifelse(VotePercent > 50 & VotePercent <= 60, "50% - 60%",
ifelse(VotePercent > 60 & VotePercent <= 70, "60% - 70%",
ifelse(VotePercent > 70 & VotePercent <= 80, "70% - 80%", "80% or greater"))))))
trumpvotes$`Trump Support in 2020 Election` <- factor(trumpvotes$`Trump Support in 2020 Election`, levels = c("30% - 40%", "40% - 50%", "50% - 60%", "60% - 70%", "70% - 80%", "80% or greater"))
rioterscounty <- data.frame(table(rioters2021$CountyCode))
mapdata <- left_join(trumpvotes, rioterscounty, by = c("CountyCode" = "Var1"))
mapdata$Freq[is.na(mapdata$Freq)] <- 0
mapdata$`Number of Rioters` <- mapdata$Freq
flmap <- readOGR("/Users/saraloving/Downloads/florida_shapefile.shp", verbose=FALSE)
setClass("num.with.commas")
setAs("character", "num.with.commas", function(from) as.numeric(gsub(",", "", from) ) )
flmap <- merge(flmap, mapdata, by.x = "NAME", by.y ="CountyName")
my.palette <- brewer.pal(n = 7, name = "Reds")
tmap_mode("view")
tm_shape(flmap) + tm_polygons(col = "Trump Support in 2020 Election", palette = my.palette,
popup.vars=c("Trump Support in 2020 Election", "Number of Rioters")) +
tm_dots(size = "Number of Rioters", shape = 1, scale = .5, col = NULL, border.col = 1, popup.vars = NULL) +
tm_layout(main.title = "Trump Support in 2020 Election", title.size = 0.5, legend.outside=T)