So, I think I want to really challenge myself with the project and thus am going to use the US data about presidential elections. This data is in a 502 KB csv which details how states voted in every presidential election from 1976. I am interested in how states became more red or more blue over time in presidential elections. The data is MIT Election Data and Science Lab, 2017, “U.S. President 1976–2020”.
First I need to input the data:
presidential_data <- read.csv("C:/Users/joshu/Downloads/1976-2020-president (2).csv")
And get a summary
#summary(presidential_data)
#head(presidential_data)
First I think I want to add a column that will tell me what percentage of the vote went to each candidate from each state each year. I just did this in excel because it was easier.
Now there are a several ways that I could make interesting graphs, but the first one that comes to mind is a map of the united state and it shows how each state has changed overtime with regards to how its people vote in elections.
To start I need to install a package that has a map of the united states:
#install.packages("usmap")
library(usmap)
library(ggplot2)
plot_usmap(regions = "states")
After doing so research online, I think I can do this more directly with
ggplot. And I found a way to color a state:
I think case I have colored my home state Illinois Blue and the rest of the states lightgrey. I found that I can use this map data and add a column for the fill of each state.
usa <- map_data('usa')
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region == "illinois", "blue", "lightgrey")
ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill),
color = "white", size = 0.25) +
scale_fill_identity()
Another experiment to get multiple states blue.
usa <- map_data('usa')
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region %in% c("illinois", "new york"), "blue", "lightgrey")
ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill),
color = "white", size = 0.25) +
scale_fill_identity()
Okay now we are getting somewhere. Now I can color states according to a
list. So for example I want to color all the states that voted for the
democratic president in 2020 blue.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
presidential_data_2020 <- filter(presidential_data, year == 2020)
max_percent_rows <- presidential_data_2020 %>%
group_by(state) %>%
filter(Percent == max(Percent))
#max_percent_rows
Blue_state <- c()
for(i in 1:51){
if (max_percent_rows$party_simplified[i] == "DEMOCRAT"){
Blue_state[i] <- max_percent_rows$state[i]
}
}
Blue_states <- na.omit(Blue_state)
Blue_states <- lapply(Blue_states, tolower)
Now that I have filtered out all the states that voted for the Democrat in 2020, I want to show them on the map in blue. I will make the default for the other states red.
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region %in% Blue_states, "blue", "Red")
ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill),
color = "white", size = 0.25) +
scale_fill_identity()
Okay, we have a good start here. I am going to make one change to the
for loop and the map:
Blue_state <- c()
Red_state <- c()
for(i in 1:51){
if (max_percent_rows$party_simplified[i] == "DEMOCRAT"){
Blue_state[i] <- max_percent_rows$state[i]
}
else if (max_percent_rows$party_simplified[i] == "REPUBLICAN"){
Red_state[i] <- max_percent_rows$state[i]
}
}
Blue_states <- na.omit(Blue_state)
Blue_states <- lapply(Blue_states, tolower)
Red_states <- na.omit(Red_state)
Red_states <- lapply(Red_states, tolower)
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region %in% Blue_states, "blue",
ifelse(map_data$region %in% Red_states, "red", "purple"))
ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill),
color = "white", size = 0.25) +
scale_fill_identity()
While this creates the same map, being colored red is not just a default but comes from specifically having the state vote for a republican for president. We can quickly polish this map:
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region %in% Blue_states, "blue",
ifelse(map_data$region %in% Red_states, "red", "purple"))
ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill),
color = "white", size = 0.25) +
scale_fill_manual(values = c("blue" = "blue", "red" = "red", "purple" = "purple"),
labels = c("Democrat", "Republican", "Other"),
drop = FALSE) +
labs(title = "How States Voted in the 2020 Presidential Election") +
theme(plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "white", color = "white"),
panel.background = element_rect(fill = "white"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.title = element_text(),
legend.position = "top")+
guides(fill = guide_legend(title = "Party"))
Now I want to add a gradient based on how close the race was. So lighter
colors would mean it was closer and darker would be more solidly won by
that party.
states_and_percentages <- data.frame(matrix(NA,nrow = 51,ncol = 0))
states_and_percentages$region <- tolower(max_percent_rows$state)
states_and_percentages$percent <- max_percent_rows$Percent
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region %in% Blue_states, "blue",
ifelse(map_data$region %in% Red_states, "red", "purple"))
map_data <- left_join(map_data,states_and_percentages,"region")
ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill,alpha = percent),
color = "white", size = 0.25) +
scale_fill_manual(values = c("blue" = "blue", "red" = "red", "purple" = "purple"),
labels = c("Democrat", "Republican", "Other"),
drop = FALSE) +
labs(title = "How States Voted in the 2020 Presidential Election") +
theme(plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "white", color = "white"),
panel.background = element_rect(fill = "white"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none")+
guides(fill = guide_legend(title = "Party"))
So here I like the alpha level the best to display how high of a
percentage each state voted for their candidate. I think the last thing
that I want to do is have this run for each year since 1976 up through
2020.
First I want to put all the code necessary for this graph together and then bind it up with a for loop:
y <- unique(presidential_data$year)
for(i in 1:12){
presidential_data_new <- filter(presidential_data, year == y[i])
max_percent_rows <- presidential_data_new %>%
group_by(state) %>%
filter(Percent == max(Percent))
Blue_state <- c()
Red_state <- c()
for(j in 1:51){
if (max_percent_rows$party_simplified[j] == "DEMOCRAT"){
Blue_state[j] <- max_percent_rows$state[j]
}
else if (max_percent_rows$party_simplified[j] == "REPUBLICAN"){
Red_state[j] <- max_percent_rows$state[j]
}
}
Blue_states <- na.omit(Blue_state)
Blue_states <- lapply(Blue_states, tolower)
Red_states <- na.omit(Red_state)
Red_states <- lapply(Red_states, tolower)
states_and_percentages <- data.frame(matrix(NA,nrow = 51,ncol = 0))
states_and_percentages$region <- tolower(max_percent_rows$state)
states_and_percentages$percent <- max_percent_rows$Percent
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region %in% Blue_states, "blue",
ifelse(map_data$region %in% Red_states, "red", "purple"))
map_data <- left_join(map_data,states_and_percentages,"region")
current_year <- y[i]
title <- paste("How the States Voted in the", current_year, "Presidential Election")
graph <- ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill,alpha = percent),
color = "white", size = 0.25) +
scale_fill_manual(values = c("blue" = "blue", "red" = "red", "purple" = "purple"),
labels = c("Democrat", "Republican", "Other"),
drop = FALSE) +
labs(title = title) +
theme(plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "white", color = "white"),
panel.background = element_rect(fill = "white"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none")+
guides(fill = guide_legend(title = "Party"))
print(graph)
}
Okay after working with my for loop for a very long time I was able to
get it to work. I also adding an updating title. So I have a thought to
make this an animation or show all 12 maps on a 3x4 grid. I am not sure
which one will work better so I might just try both.
I will start with the grid. After some online research I found I library that will be helpful for setting up this grid arrangment that I want:
library(gridExtra)
graph_list <- list()
for(i in 1:12){
presidential_data_new <- filter(presidential_data, year == y[i])
max_percent_rows <- presidential_data_new %>%
group_by(state) %>%
filter(Percent == max(Percent))
Blue_state <- c()
Red_state <- c()
for(j in 1:51){
if (max_percent_rows$party_simplified[j] == "DEMOCRAT"){
Blue_state[j] <- max_percent_rows$state[j]
}
else if (max_percent_rows$party_simplified[j] == "REPUBLICAN"){
Red_state[j] <- max_percent_rows$state[j]
}
}
Blue_states <- na.omit(Blue_state)
Blue_states <- lapply(Blue_states, tolower)
Red_states <- na.omit(Red_state)
Red_states <- lapply(Red_states, tolower)
states_and_percentages <- data.frame(matrix(NA,nrow = 51,ncol = 0))
states_and_percentages$region <- tolower(max_percent_rows$state)
states_and_percentages$percent <- max_percent_rows$Percent
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region %in% Blue_states, "blue",
ifelse(map_data$region %in% Red_states, "red", "purple"))
map_data <- left_join(map_data,states_and_percentages,"region")
current_year <- y[i]
title <- paste(current_year)
graph <- ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill,alpha = percent),
color = "white", size = 0.25) +
scale_fill_manual(values = c("blue" = "blue", "red" = "red", "purple" = "purple"),
labels = c("Democrat", "Republican", "Other"),
drop = FALSE) +
labs(title = title) +
theme(plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "white", color = "white"),
panel.background = element_rect(fill = "white"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none")+
guides(fill = guide_legend(title = "Party"))
graph_list[[i]] <- graph
}
graphs_arranged <- grid.arrange(
grobs = graph_list,
nrow = 3,
ncol = 4
)
# Display the grid of graphs
graphs_arranged
## TableGrob (3 x 4) "arrange": 12 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (1-1,4-4) arrange gtable[layout]
## 5 5 (2-2,1-1) arrange gtable[layout]
## 6 6 (2-2,2-2) arrange gtable[layout]
## 7 7 (2-2,3-3) arrange gtable[layout]
## 8 8 (2-2,4-4) arrange gtable[layout]
## 9 9 (3-3,1-1) arrange gtable[layout]
## 10 10 (3-3,2-2) arrange gtable[layout]
## 11 11 (3-3,3-3) arrange gtable[layout]
## 12 12 (3-3,4-4) arrange gtable[layout]
Okay now that is pretty amazing. Time for a gif and animation. I have never used gganimate before but we are going to try it to see if it works:
library(gganimate)
animation_list <- list()
for(i in 1:12){
presidential_data_new <- filter(presidential_data, year == y[i])
max_percent_rows <- presidential_data_new %>%
group_by(state) %>%
filter(Percent == max(Percent))
Blue_state <- c()
Red_state <- c()
for(j in 1:51){
if (max_percent_rows$party_simplified[j] == "DEMOCRAT"){
Blue_state[j] <- max_percent_rows$state[j]
}
else if (max_percent_rows$party_simplified[j] == "REPUBLICAN"){
Red_state[j] <- max_percent_rows$state[j]
}
}
Blue_states <- na.omit(Blue_state)
Blue_states <- lapply(Blue_states, tolower)
Red_states <- na.omit(Red_state)
Red_states <- lapply(Red_states, tolower)
states_and_percentages <- data.frame(matrix(NA,nrow = 51,ncol = 0))
states_and_percentages$region <- tolower(max_percent_rows$state)
states_and_percentages$percent <- max_percent_rows$Percent
map_data <- data.frame(map_data("state"))
map_data$fill <- ifelse(map_data$region %in% Blue_states, "blue",
ifelse(map_data$region %in% Red_states, "red", "purple"))
map_data <- left_join(map_data,states_and_percentages,"region")
current_year <- y[i]
title <- paste(current_year)
graph <- ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = fill,alpha = percent),
color = "white", size = 0.25) +
scale_fill_manual(values = c("blue" = "blue", "red" = "red", "purple" = "purple"),
labels = c("Democrat", "Republican", "Other"),
drop = FALSE) +
labs(title = title) +
theme(plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "white", color = "white"),
panel.background = element_rect(fill = "white"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none")+
guides(fill = guide_legend(title = "Party"))
animation_list[i] <- graph}
animation_data_frame <- do.call(rbind, animation_list)
animation_data_frame2<- dplyr::bind_rows(animation_data_frame, .id = "frame")
#animated_plot <- ggplot(animation_data_frame2, aes(x = 1, y = 1)) +
# geom_blank() +
#transition_manual(id = seq_len(nrow(plot_data))) +
#enter_grow() +
#exit_shrink() +
#labs(title = "Animated Plot")
Okay after working on this for a long time, I was unable to get a animation looking how I envisioned. So I think I am going to stick with the grid of the 12 maps.
There are a lot of other ideas I think I could do for this such as how third party candidates have fairer over the years and over different states. I think this map would not be difficult to make. First I just need to filter my data set so that I am only looking at third party candidates and then graph those in a very similar way to before.
new_presidential_data <- subset(presidential_data, select = c(state, year, party_simplified, Percent))
state_year_combinations <- expand.grid(state = unique(new_presidential_data$state),
year = unique(new_presidential_data$year))
# Create the row with "NONE" party_simplified and 0 percent
new_row <- data.frame(state = state_year_combinations$state,
year = state_year_combinations$year,
party_simplified = "NONE",
Percent = 0)
# Combine the original data with the new row
new_data <- rbind(new_presidential_data, new_row)
new_data_sorted <- arrange(new_data, year, state)
third_part_data <- subset(new_data_sorted, !(party_simplified %in% c("DEMOCRAT", "REPUBLICAN")))
percent_by_state_by_year <- aggregate(Percent ~ state + year, data = third_part_data, sum)
graph_list <- list()
y <- unique(presidential_data$year)
for(i in 1:12){
yearly_data <- filter(percent_by_state_by_year, year == y[i])
states_and_percentages <- data.frame(matrix(NA,nrow = 51,ncol = 0))
states_and_percentages$region <- tolower(yearly_data$state)
states_and_percentages$percent <- yearly_data$Percent
map_data <- data.frame(map_data("state"))
map_data <- left_join(map_data,states_and_percentages,"region")
current_year <- y[i]
graph <- ggplot() +
geom_map(data = map_data, map = map_data,
aes(x = long, y = lat, map_id = region, fill = "purple",alpha = percent),
color = "white", size = 0.25) +
scale_fill_manual(values = c("purple" = "purple")) +
labs(title = current_year) +
theme(plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "white", color = "white"),
panel.background = element_rect(fill = "white"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none")+
guides(fill = guide_legend(title = "Party"))
graph_list[[i]] <- graph
}
graphs_arranged <- grid.arrange(
title = "How Third Party Canidate did each Year by State",
grobs = graph_list,
nrow = 3,
ncol = 4
)
# Display the grid of graphs
graphs_arranged
## TableGrob (3 x 4) "arrange": 12 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (1-1,4-4) arrange gtable[layout]
## 5 5 (2-2,1-1) arrange gtable[layout]
## 6 6 (2-2,2-2) arrange gtable[layout]
## 7 7 (2-2,3-3) arrange gtable[layout]
## 8 8 (2-2,4-4) arrange gtable[layout]
## 9 9 (3-3,1-1) arrange gtable[layout]
## 10 10 (3-3,2-2) arrange gtable[layout]
## 11 11 (3-3,3-3) arrange gtable[layout]
## 12 12 (3-3,4-4) arrange gtable[layout]
unique(presidential_data$party_simplified)
## [1] "DEMOCRAT" "REPUBLICAN" "OTHER" "LIBERTARIAN"
Okay it was a lot more difficult than I thought but here it is. I had trouble with getting states that did not have any third party candidates to stay in my data set. I needed them in there so the map had something to work with. The darker each state is mean that more people in that state voted for someone who was not a democrat or a republican. Overall I am satisfied with my two projects so far. I definitely think these graphs show some interesting things in the data and how political behavior has changed over the last thirty years. Lastly I want to see who has received the most votes for president over the last thirty years. This will be an accumulation of all the votes that candidate has received ever. I will filter out anyone who got less than ten million votes total so we are not overwhelmed with candidates. Okay so this chart shows the total number of votes each of the major candidates for the last thirty years have received in total. One trend that it shows is how more recent candidates get more votes. I also just find these comparisons interesting. I mostly worked on this project for several hours in the afternoon on Friday, and several hours in the early afternoon on Saturday.
presidential_data$candidate <- gsub("BUSH, GEORGE H.W.", "BUSH SR", presidential_data$candidate)
presidential_data$candidate <- gsub("BUSH, GEORGE W.", "BUSH JR", presidential_data$candidate)
presidential_data$candidate <- gsub("CLINTON, BILL", "BILL CLINTON", presidential_data$candidate)
presidential_data$candidate <- gsub("CLINTON, HILLARY", "HILLARY CLINTON", presidential_data$candidate)
presidential_data$candidate <- sapply(strsplit(presidential_data$candidate, ", "), function(x) x[1])
real_canidates_totals <- aggregate(candidatevotes ~ candidate, data = presidential_data, sum)
real_canidates <- filter(real_canidates_totals, candidatevotes > 10000000)
ggplot(real_canidates, aes(x = reorder(candidate, -candidatevotes), y = candidatevotes))+
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Candidate Votes",
x = "Candidates",
y = "Votes") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"))+
scale_y_continuous(breaks = c(0, 50000000, 100000000, 150000000),
labels = c("0", "50,000,000", "100,000,000", "150,000,000"))