Dashboard : Analysis of Election Data, USA

---
title: "Dashboard : Analysis of Election Data, USA"
author: "Hari poorna kumar kalahasti"
output: 
  flexdashboard::flex_dashboard:
    theme: journal
    orientation: columns
    vertical_layout: fill
    social: menu
    source_code: embed
---

```{r setup, include=FALSE}
library(tidyverse)   # Comprehensive data manipulation and visualization tools
library(magrittr)    # Allows using the %>% operator for cleaner code
library(stringr)     # Provides string manipulation functions
library(dplyr)       # Provides data manipulation functions (part of tidyverse)
library(tidyr)       # Provides tools for data tidying (part of tidyverse)
library(lubridate)   # Helps with date and time manipulation (part of tidyverse)
library(flexdashboard)
library(readr)
library(skimr)
library(flextable)
library(viridis)     # Provides color palettes for visualizations
library(ggplot2)
library(forcats)
library(datadictionary)
library(tidyverse)
library(maps) 
library(gtrendsR)    # Provides an interface for accessing Google Trends data
library(plotly)      # Allows creating interactive plots
```

```{r, message=FALSE}
# Importing CSV file
election_data <- read_csv("Presidential_Elections_Data_count_level.csv")
census_dataset <- read_csv("census.csv")
```

```{r, message=FALSE}
# Converting values in "state" variable to capitalized form
election_data <- election_data %>%
  mutate(state = str_to_title(state))

# Converting values in "candidate" variable to capitalized form
election_data <- election_data %>%
  mutate(candidate = str_to_title(candidate))

# Converting values in "party" variable to capitalized form
election_data <- election_data %>%
  mutate(party = str_to_title(party))

# Converting values in "county_name" variable to capitalized form
election_data <- election_data %>%
  mutate(county_name = str_to_title(county_name))

election_data <- election_data %>% 
  rename(county = county_name)

# Now, Cleaning the candidate using mutate() and str_replace() function
election_data <- election_data %>% 
  mutate(candidate = str_replace(candidate, "Donald Trump|Donald J Trump", "Donald Trump")) %>%
  mutate(candidate = str_replace(candidate, "Joseph R Biden Jr", "Joe Biden"))

# Creating a lookup table for electoral college votes allocation
default_electoral_college_seats_state_wise <- data.frame(
  state = c("Alabama", "Kentucky", "North Dakota", "Alaska", "Louisiana", "Ohio", "Arizona", "Maine", "Oklahoma", "Arkansas",
            "Maryland", "Oregon", "California", "Massachusetts", "Pennsylvania", "Colorado", "Michigan", "Rhode Island", "Connecticut",
            "Minnesota", "South Carolina", "Delaware", "Mississippi", "South Dakota", "District of Columbia", "Missouri", "Tennessee",
            "Florida", "Montana", "Texas", "Georgia", "Nebraska", "Utah", "Hawaii", "Nevada", "Vermont", "Idaho", "New Hampshire",
            "Virginia", "Illinois", "New Jersey", "Washington", "Indiana", "New Mexico", "West Virginia", "Iowa", "New York", "Wisconsin",
            "Kansas", "North Carolina", "Wyoming"),
  electoral_seats = c(9, 8, 3, 3, 8, 17, 11, 4, 7, 6, 10, 8, 54, 11, 19, 10, 15, 4, 7, 10, 9, 3, 6, 3, 3, 10, 11, 30, 4, 40, 16, 5, 6,
                      4, 4, 3, 4, 4, 13, 19, 14, 12, 11, 5, 4, 6, 28, 10, 6, 16, 3)
)

# Merging election_data with the electoral_seats lookup table based on state
merged_election_data <- merge(election_data, default_electoral_college_seats_state_wise, by = "state", all.x = TRUE)

# Calculating total votes for each party in each state for each year
merged_election_data <- merged_election_data %>%
  group_by(year, state, party) %>%
  summarise(total_votes_state_party = sum(candidatevotes))

# Determining the winner party in each state for each year based on total votes majority
state_winners <- merged_election_data %>%
  group_by(year, state) %>%
  filter(total_votes_state_party == max(total_votes_state_party)) %>%
  ungroup()

# Merging the state_winners with the electoral_seats table to assign electoral votes to each party
state_winners_with_votes <- state_winners %>%
  left_join(default_electoral_college_seats_state_wise, by = c("state" = "state"))
```


```{r, message=FALSE}

# Calculating the total electoral votes won by each party for each year
electoral_college_seats <- state_winners_with_votes %>%
  group_by(year, party) %>%
  summarise(total_electoral_seats = sum(electoral_seats, na.rm = TRUE))

electoral_college_seats_table <- electoral_college_seats %>%
  as.data.frame()


# Filteing the election_data for the years 2000 to 2020
year_data <- election_data[election_data$year >= 2000 & election_data$year <= 2020, ]

# Group by year and candidate, and calculate the total votes for each candidate in each year
candidate_votes <- year_data %>%
  group_by(year, candidate) %>%
  summarize(total_votes = sum(candidatevotes))

# Calculating the total votes for each year
total_votes_per_year <- year_data %>%
  group_by(year) %>%
  summarize(total_votes = sum(candidatevotes))

# Merging the two data frames to get the total votes as percentages
candidate_votes_percentage <- merge(candidate_votes, total_votes_per_year, by = "year") %>%
  mutate(percentage_votes = (total_votes.x / total_votes.y) * 100) %>%
  select(-total_votes.x, -total_votes.y) %>%
  spread(year, percentage_votes, fill = 0)

# Converting percentage values to numeric, round to two decimals, and add "%" symbol
candidate_votes_percentage[, -1] <- lapply(candidate_votes_percentage[, -1], function(x) paste0(format(round(as.numeric(x), 2), nsmall = 2), "%"))

candidate_votes_percentage_table <- candidate_votes_percentage %>%
  as.data.frame()


# First, Calculate the percentage of votes for each party in each year
votes_of_party_in_percentage <- election_data %>%
  group_by(year, party) %>%
  summarize(percentage_votes = sum(candidatevotes) / sum(totalvotes) * 100) %>%
  ungroup()

# Then, Reorder the levels of the party variable based on average percentage of votes across all years
votes_of_party_in_percentage <- votes_of_party_in_percentage %>%
  mutate(party = fct_reorder(party, percentage_votes, .desc = TRUE))

# Now, Creating the stacked bar chart with facet_wrap by year
percentage_votes_each_party_recieved <- ggplot(votes_of_party_in_percentage, aes(fill = party, y = percentage_votes, x = party)) +
  geom_bar(position = position_dodge(width = 0.7), stat = "identity") +
  scale_fill_viridis(discrete = TRUE, option = "E") +
  facet_wrap(~ year, ncol = 3, scales = "free_x") +
  theme_minimal() +
  xlab("Party") +
  labs(title = "Percentage of Votes Received by Each Party Each Year",
       x = "Party",
       y = "Percentage of Votes",
       caption = "Data source: https://electionlab.mit.edu/data") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 100, by = 10))



# Line chart showing in which year most votings took place

# Calculating total votes per year
total_votes_per_year <- election_data %>%
  group_by(year) %>%
  summarize(total_votes = sum(candidatevotes))

# Creating the line graph
total_votes_per_year_line_graph <- ggplot(total_votes_per_year, aes(x = year, y = total_votes)) +
  geom_line(size=1,color="Red") +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 5),
                     labels = scales::comma) +
  labs(title = "Total Number of Polling in the United States from 2000-2020",
       x = "Year",
       y = "Number of Votes (in numbers)",
       caption = "Data source: https://electionlab.mit.edu/data") +
  theme(plot.title = element_text(hjust = 0.5))+
  theme_get()


# Loading the us_states shapefile from the maps package
us_states <- map_data("state")

# Renamed the "region" column to "state" in us_states
us_states <- us_states %>% 
  rename(state = region)

us_states <- us_states %>%
  mutate(state = str_to_title(state))

# Filtering the state_winners_with_votes dataset for the year 2020
state_winners_2020 <- state_winners_with_votes %>%
  filter(year == 2020)

state_winners_2016 <- state_winners_with_votes %>%
  filter(year == 2016)

# Left join the us_states dataset with state_winners_2020 based on the "state" column
map_data_combined_2020 <- left_join(us_states, state_winners_2020, by = "state", copy = TRUE)

election_outcome_map_2020 <- ggplot(map_data_combined_2020, aes(x = long, y = lat, group = group, fill = party)) +
  geom_polygon(color = "white", size = 0.25) +  # Add thick white borders between states
  coord_quickmap() +
  scale_fill_manual(values = c("Democrat" = "blue", "Republican" = "red", "Other" = "gray")) +
  labs(title = "2020 Election outcome by state",
       caption = "Data source:  https://electionlab.mit.edu/data") +
  theme_void()


# Function to load data from gtrends, if the gtrends fetching fails, will load the local RDS file
load_gtrends_data <- function() {
  tryCatch({
    # Attempt to fetch data from gtrends
    data <- gtrends(c("US Election Results 2020", "Election Fraud"), 
                    time = "2020-08-02 2020-12-03", geo = "US")
    
    # Manipulating date using lubridate function
    data$interest_over_time$date <- ymd(data$interest_over_time$date) # Converting to proper date format
    
    write_rds(data, file = "importedDataGtrends.rds")  # Save data to RDS file
    return(data)  # Return the data
  }, error = function(err) {
    # If gtrends call fails, attempt to read data from RDS file
    if (file.exists("importedDataGtrends.rds")) {
      data <- read_rds("importedDataGtrends.rds")
      
      # Manipulating date using lubridate function
      data$interest_over_time$date <- ymd(data$interest_over_time$date) # Converting to proper date format
      
      return(data)  # Return the data from RDS file
    } else {
      message("Failed to fetch data from gtrends and no RDS file found.")
      return(NULL)  # Return NULL or any other default value if both attempts fail
    }
  })
}
# Loading google trends data using the load_gtrends_data function
gTrendsElectionSearchData <- load_gtrends_data()

if (!is.null(data)) {
  plot_trend <- function(keyword_string, data) {
    time_trend <- data$interest_over_time %>%
      mutate(hits = ifelse(hits == "<1", 0.5, as.numeric(hits)),
             date = as.Date(date),
             keyword = factor(keyword, levels = keyword_string))
    
    plot <- plot_ly(data = time_trend, x = ~date, y = ~hits, color = ~keyword,
                    type = "scatter", mode = "lines", fill = "tozeroy") %>%
      layout(title = "Exploring the Search Interest for 'Election Fraud' and 'US Election Results 2020' during the 2020 US Election Season",
             xaxis = list(title = "Months in year 2020", standoff = 20),
             yaxis = list(title = "Hits (Relative to Peak from 0 - 100)", standoff = 20),
             legend = list(orientation = "v"),
             showlegend = TRUE,
             margin = list(l = 100, r = 100, b = 100, t = 100),  # inorder to Adjust margins
             padding = list(r = 10, b = 50)  # inorder to Adjust padding
      )
    return(plot)
  }
  election_fraud_trend_plot <- plot_trend(keyword_string = c("US Election Results 2020", "Election Fraud"),gTrendsElectionSearchData)
}
```

Column {data-width=750 .tabset style="margin-top: 0px;"}
-----------------------------------------------------------------------

### Search Interest during 2020 US Election Season.

```{r}
election_fraud_trend_plot
```

### Percentage of Votes by Party (2000-2020)

```{r}
total_votes_per_year_line_graph
```

###  Metrics of Number of Votes polled (2000-2020)

```{r}
percentage_votes_each_party_recieved
```

### Election outcome 2020

```{r}
election_outcome_map_2020
```

Column {data-width=550, style="margin-top: 0px;"}
-----------------------------------------------------------------------

### Summary statistics of Percentage of votes each candidate recieved for each term year

```{r}
candidate_votes_percentage_table
```

### Lookup table for electoral college votes allocation

```{r}
electoral_college_seats_table
```