Political Dissimilarity

Author

coda

Calculating the Political Dissimilarity Index

library(sf)
Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.0.10     ✔ readr     2.1.4 
✔ forcats   1.0.0      ✔ stringr   1.5.0 
✔ ggplot2   3.4.1      ✔ tibble    3.1.8 
✔ lubridate 1.9.2      ✔ tidyr     1.2.1 
✔ purrr     1.0.1      
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(spdep)
Loading required package: sp
Loading required package: spData
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
library(tigris)
To enable caching of data, set `options(tigris_use_cache = TRUE)`
in your R script or .Rprofile.
library(readr)
 #Read in election data
votesdata <- read_csv("county level presidential data 2020.csv")
Rows: 257 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Office, State, RaceDate, CensusPop, Area, RedistrictedDate, TotalV...
dbl  (1): FIPPS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename FIPPS column to FIPS
names(votesdata)[names(votesdata) == "FIPPS"] <- "FIPS"
names(votesdata)
 [1] "Office"           "State"            "RaceDate"         "CensusPop"       
 [5] "FIPS"             "Area"             "RedistrictedDate" "TotalVotes"      
 [9] "RepVotes"         "RepCandidate"     "RepStatus"        "DemVotes"        
[13] "DemCandidate"     "DemStatus"       
str(votesdata)
spc_tbl_ [257 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Office          : chr [1:257] "President" "President" "President" "President" ...
 $ State           : chr [1:257] "Texas" "Texas" "Texas" "Texas" ...
 $ RaceDate        : chr [1:257] "20201103" "20201103" "20201103" "20201103" ...
 $ CensusPop       : chr [1:257] "58,458" "14,786" "86,771" "23,158" ...
 $ FIPS            : num [1:257] 48001 48003 48005 48007 48009 ...
 $ Area            : chr [1:257] "ANDERSON" "ANDREWS" "ANGELINA" "ARANSAS" ...
 $ RedistrictedDate: chr [1:257] "N/A" "N/A" "N/A" "N/A" ...
 $ TotalVotes      : chr [1:257] "19,227" "5,863" "34,574" "12,290" ...
 $ RepVotes        : chr [1:257] "15,110" "4,943" "25,076" "9,239" ...
 $ RepCandidate    : chr [1:257] "Trump, Donald J." "Trump, Donald J." "Trump, Donald J." "Trump, Donald J." ...
 $ RepStatus       : chr [1:257] "Incumbent" "Incumbent" "Incumbent" "Incumbent" ...
 $ DemVotes        : chr [1:257] "3,955" "850" "9,143" "2,916" ...
 $ DemCandidate    : chr [1:257] "Biden, Joseph R. Jr." "Biden, Joseph R. Jr." "Biden, Joseph R. Jr." "Biden, Joseph R. Jr." ...
 $ DemStatus       : chr [1:257] "Challenger" "Challenger" "Challenger" "Challenger" ...
 - attr(*, "spec")=
  .. cols(
  ..   Office = col_character(),
  ..   State = col_character(),
  ..   RaceDate = col_character(),
  ..   CensusPop = col_character(),
  ..   FIPPS = col_double(),
  ..   Area = col_character(),
  ..   RedistrictedDate = col_character(),
  ..   TotalVotes = col_character(),
  ..   RepVotes = col_character(),
  ..   RepCandidate = col_character(),
  ..   RepStatus = col_character(),
  ..   DemVotes = col_character(),
  ..   DemCandidate = col_character(),
  ..   DemStatus = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
summary(votesdata)
    Office             State             RaceDate          CensusPop        
 Length:257         Length:257         Length:257         Length:257        
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
      FIPS           Area           RedistrictedDate    TotalVotes       
 Min.   :48001   Length:257         Length:257         Length:257        
 1st Qu.:48128   Class :character   Class :character   Class :character  
 Median :48254   Mode  :character   Mode  :character   Mode  :character  
 Mean   :48254                                                           
 3rd Qu.:48381                                                           
 Max.   :48507                                                           
 NA's   :3                                                               
   RepVotes         RepCandidate        RepStatus           DemVotes        
 Length:257         Length:257         Length:257         Length:257        
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
 DemCandidate        DemStatus        
 Length:257         Length:257        
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      
# na.omit(votesdata)


library(dplyr)

# Remove the last three rows from the dataframe
votesdata <- votesdata %>% slice(1:(n() - 3))

# Calculate total number of votes in each county
votesdata$county_total_votes <- as.numeric(gsub(",", "", votesdata$TotalVotes))

# Calculate proportion of Republican votes in each county
votesdata$prop_rep <- as.numeric(gsub(",", "", votesdata$RepVotes)) / votesdata$county_total_votes

# Calculate proportion of Democratic votes in each county
votesdata$prop_dem <- as.numeric(gsub(",", "", votesdata$DemVotes)) / votesdata$county_total_votes

# Calculate overall proportion of Republican votes across all counties
total_rep_votes <- sum(as.numeric(gsub(",", "", votesdata$RepVotes)))
total_dem_votes <- sum(as.numeric(gsub(",", "", votesdata$DemVotes)))
prop_total_rep <- total_rep_votes / (total_rep_votes + total_dem_votes)

# Calculate dissimilarity index for each county
votesdata$dissimilarity_index <- abs(votesdata$prop_rep - prop_total_rep)

# Calculate overall dissimilarity index
overall_dissimilarity_index <- sum(votesdata$county_total_votes * votesdata$dissimilarity_index) / (2 * total_rep_votes * total_dem_votes)

Mapping the Dissimilarity Index: SEE HERE

Attempt #1

# library(sf)
# library(ggplot2)
# 
# # First I loaded the shapefile for Texas
# tx_shp <- st_read("new.shp")
# 
# # Then, I tried to merge shapefile with the election data, but I failed :( 
# m_tx_shape <- merge(sf = tx_shape, y = votesdata, by = "Area")
# 
# # Generate choropleth map --- but I can't 
# ggplot() +
#   geom_sf(data = tx_map, aes(fill = variable_to_plot)) +
#   scale_fill_gradient(low = "white", high = "red") +
#   theme_void()

Attempt #2

Another attempt, but I tried downloading the shapefile using the Tigris/SF packages.

# library(sf)
# library(dplyr)
# 
# tx_counties <- counties(state = 'TX', cb = TRUE, resolution = '20m')
# 
# mergedvote <- geo_join(tx_counties, votesdata, "name", "Area")