Overview

This analysis examines whether congressional redistricting in Mecklenburg County, NC affected voter turnout between the 2020 and 2022 general elections.

Research Question:

Is redistricting correlated to a change in turnout?

Hypothesis:

A negative correlation exists between the occurrence of redistricting and a decrease in voter turnout for voters whose districts changed as a result.

library(tidyverse)

Data Loading

Snapshot2020 <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/VR_Snapshot_20201103_reencoded 2.csv", col_types = "__cc_c______________________________________________c______________________________________")

MECKLENBURG_data_2020 <- Snapshot2020 |>
  filter(county_desc == "MECKLENBURG") |>
  rename(cong_dist_2020 = cong_dist_abbrv) |>
  select(-status_cd)

write_csv(MECKLENBURG_data_2020, "MECKLENBURG_data_2020.csv")

Snapshot2022 <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/VR_Snapshot_20221108_reencoded 2.csv", col_types = "__cc_c______________________________________________c______________________________________")

MECKLENBURG_data_2022 <- Snapshot2022 |>
  filter(county_desc == "MECKLENBURG") |>
  filter(status_cd == "A" | status_cd == "I") |>
  rename(cong_dist_2022 = cong_dist_abbrv) |>
  select(-status_cd)

MECKLENBURG_data_2022 <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/MECKLENBURG_data_2022_3.csv", col_types = "ccc")

JoinedNCFile <- inner_join(MECKLENBURG_data_2020, MECKLENBURG_data_2022, join_by(county_desc, voter_reg_num))

write_csv(JoinedNCFile, "JoinedNCfile.csv")

MecklenburgVoterHistory <- read_tsv("~/Downloads/ncvhis60.txt", col_types = "_ccc___________")

MecklenburgVoterHistory <- MecklenburgVoterHistory |>
  filter(election_lbl == "11/03/2020" | election_lbl == "11/08/2022")

write_csv(MecklenburgVoterHistory, "MecklenburgVoterHistory.csv")

JoinedNCfile  <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/JoinedNCfile.csv")
MecklenburgVoterHistory <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/MecklenburgVoterHistory.csv")

Identifying Redistricted Voters

district_changes <- JoinedNCfile |>
  select(voter_reg_num, cong_dist_2020, cong_dist_2022) |>
  mutate(
    cong_dist_2020  = as.integer(cong_dist_2020),
    cong_dist_2022  = as.integer(cong_dist_2022),
    redistricted    = cong_dist_2020 != cong_dist_2022,
    district_change = if_else(
      redistricted,
      paste0(cong_dist_2020, " → ", cong_dist_2022),
      "no change"
    )
  )

district_changes |> count(redistricted)
## # A tibble: 3 × 2
##   redistricted      n
##   <lgl>         <int>
## 1 FALSE        345488
## 2 TRUE         357641
## 3 NA              274
district_changes |> filter(redistricted) |> count(district_change, sort = TRUE)
## # A tibble: 3 × 2
##   district_change      n
##   <chr>            <int>
## 1 12 → 14         212857
## 2 9 → 14          142986
## 3 9 → 12            1798

Building Turnout Flags

turnout <- MecklenburgVoterHistory |>
  filter(election_lbl %in% c("11/03/2020", "11/08/2022")) |>
  mutate(year = if_else(str_detect(election_lbl, "11/03/2020"), "2020", "2022")) |>
  pivot_wider(
    id_cols      = voter_reg_num,
    names_from   = year,
    names_prefix = "voted_",
    values_from  = election_lbl,
    values_fn    = ~ 1L,
    values_fill  = 0L
  )

glimpse(turnout)
## Rows: 585,372
## Columns: 3
## $ voter_reg_num <chr> "001000314697", "000999856264", "000999870499", "0010009…
## $ voted_2020    <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ voted_2022    <int> 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,…

Master Analysis Table

analysis_df <- district_changes |>
  left_join(turnout, by = "voter_reg_num") |>
  mutate(
    voted_2020   = replace_na(voted_2020, 0L),
    voted_2022   = replace_na(voted_2022, 0L),
    redistricted = as.integer(redistricted)
  ) |>
  filter(!is.na(redistricted))

glimpse(analysis_df)
## Rows: 703,129
## Columns: 7
## $ voter_reg_num   <chr> "000000014843", "000000012829", "000000013499", "00000…
## $ cong_dist_2020  <int> 12, 12, 12, 9, 9, 9, 9, 9, 12, 12, 9, 12, 12, 12, 12, …
## $ cong_dist_2022  <int> 14, 12, 12, 14, 14, 14, 14, 14, 12, 12, 14, 14, 14, 12…
## $ redistricted    <int> 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, …
## $ district_change <chr> "12 → 14", "no change", "no change", "9 → 14", "9 → 14…
## $ voted_2020      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, …
## $ voted_2022      <int> 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, …
cat("Voters active in at least one election:", nrow(analysis_df), "\n")
## Voters active in at least one election: 703129
cat("Voted in 2020:", sum(analysis_df$voted_2020), "\n")
## Voted in 2020: 495186
cat("Voted in 2022:", sum(analysis_df$voted_2022), "\n")
## Voted in 2022: 306081

Descriptive Turnout Rates

turnout_summary <- analysis_df |>
  group_by(redistricted) |>
  summarise(
    n              = n(),
    pct_voted_2020 = mean(voted_2020),
    pct_voted_2022 = mean(voted_2022),
    turnout_change = mean(voted_2022) - mean(voted_2020)
  )

print(turnout_summary)
## # A tibble: 2 × 5
##   redistricted      n pct_voted_2020 pct_voted_2022 turnout_change
##          <int>  <int>          <dbl>          <dbl>          <dbl>
## 1            0 345488          0.685          0.404         -0.281
## 2            1 357641          0.723          0.466         -0.257

Chi-Square Test

chisq.test(table(analysis_df$redistricted, analysis_df$voted_2022))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(analysis_df$redistricted, analysis_df$voted_2022)
## X-squared = 2747.8, df = 1, p-value < 2.2e-16

Logistic Regression

model <- glm(
  voted_2022 ~ redistricted + voted_2020,
  data   = analysis_df,
  family = binomial(link = "logit")
)

summary(model)
## 
## Call:
## glm(formula = voted_2022 ~ redistricted + voted_2020, family = binomial(link = "logit"), 
##     data = analysis_df)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.921230   0.015337 -255.67   <2e-16 ***
## redistricted  0.229281   0.005728   40.03   <2e-16 ***
## voted_2020    4.245963   0.015313  277.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 962942  on 703128  degrees of freedom
## Residual deviance: 704748  on 703126  degrees of freedom
## AIC: 704754
## 
## Number of Fisher Scoring iterations: 6
exp(cbind(OR = coef(model), confint.default(model)))
##                       OR       2.5 %      97.5 %
## (Intercept)   0.01981671  0.01922988  0.02042145
## redistricted  1.25769546  1.24365455  1.27189489
## voted_2020   69.82295618 67.75852869 71.95028145

Breakdown by District Change

change_summary <- analysis_df |>
  filter(redistricted == 1) |>
  group_by(district_change) |>
  summarise(
    n              = n(),
    pct_voted_2022 = mean(voted_2022),
    pct_voted_2020 = mean(voted_2020)
  ) |>
  arrange(desc(n))

print(change_summary)
## # A tibble: 3 × 4
##   district_change      n pct_voted_2022 pct_voted_2020
##   <chr>            <int>          <dbl>          <dbl>
## 1 12 → 14         212857          0.419          0.689
## 2 9 → 14          142986          0.534          0.771
## 3 9 → 12            1798          0.541          0.800

Graph 1: Turnout by Redistricting Status

turnout_long <- turnout_summary |>
  select(redistricted, pct_voted_2020, pct_voted_2022) |>
  pivot_longer(
    cols      = c(pct_voted_2020, pct_voted_2022),
    names_to  = "year",
    values_to = "turnout_rate"
  ) |>
  mutate(
    year        = if_else(year == "pct_voted_2020", "2020", "2022"),
    voter_group = case_when(
      redistricted == 0 ~ "Not redistricted",
      redistricted == 1 ~ "Redistricted",
      TRUE              ~ "Unknown"
    )
  ) |>
  filter(voter_group != "Unknown")

ggplot(turnout_long, aes(x = year, y = turnout_rate, fill = voter_group)) +
  geom_col(position = "dodge", width = 0.6) +
  geom_text(
    aes(label = scales::percent(turnout_rate, accuracy = 0.1)),
    position = position_dodge(width = 0.6),
    vjust    = -0.5,
    size     = 3.5
  ) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) +
  scale_fill_manual(values = c("Not redistricted" = "#3B8BD4", "Redistricted" = "#E8593C")) +
  labs(
    title    = "Voter Turnout in 2020 vs 2022 by Redistricting Status",
    subtitle = "Mecklenburg County, NC — among voters active in at least one election",
    x        = "Election Year",
    y        = "Turnout Rate",
    fill     = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title      = element_text(face = "bold"),
    legend.position = "top"
  )

Graph 2: Turnout Change by Specific District Reassignment

change_summary_with_stayers <- analysis_df |>
  mutate(
    group = case_when(
      cong_dist_2020 == 12 & cong_dist_2022 == 12 ~ "Stayed in District 12",
      redistricted == 1 ~ district_change,
      TRUE ~ NA_character_
    )
  ) |>
  filter(!is.na(group)) |>
  group_by(group) |>
  summarise(
    n              = n(),
    pct_voted_2020 = mean(voted_2020),
    pct_voted_2022 = mean(voted_2022)
  ) |>
  arrange(desc(n))

change_long <- change_summary_with_stayers |>
  pivot_longer(
    cols      = c(pct_voted_2020, pct_voted_2022),
    names_to  = "year",
    values_to = "turnout_rate"
  ) |>
  mutate(year = if_else(year == "pct_voted_2020", "2020", "2022"))

labels_2020 <- change_long |>
  filter(year == "2020") |>
  arrange(desc(turnout_rate)) |>
  mutate(label_y = c(1.00, 0.94, 0.88, 0.82))

labels_2022 <- change_long |>
  filter(year == "2022") |>
  arrange(desc(turnout_rate)) |>
  mutate(label_y = c(0.72, 0.66, 0.60, 0.54))

ggplot(change_long, aes(x = year, y = turnout_rate,
                        group = group, color = group)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 3) +
  geom_segment(
    data = labels_2020,
    aes(x = 0.65, xend = 0.78, y = label_y, yend = turnout_rate),
    linewidth = 0.3, linetype = "dotted"
  ) +
  geom_segment(
    data = labels_2022,
    aes(x = 2.22, xend = 2.35, y = turnout_rate, yend = label_y),
    linewidth = 0.3, linetype = "dotted"
  ) +
  geom_text(
    data  = labels_2020,
    aes(x = 0.62, y = label_y,
        label = scales::percent(turnout_rate, accuracy = 0.1),
        color = group),
    hjust = 1, size = 3.2
  ) +
  geom_text(
    data  = labels_2022,
    aes(x = 2.38, y = label_y,
        label = scales::percent(turnout_rate, accuracy = 0.1),
        color = group),
    hjust = 0, size = 3.2
  ) +
  scale_x_discrete(limits = c("2020", "2022"),
                   expand  = expansion(mult = 0.5)) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1.05)) +
  scale_color_manual(
    values = c(
      "12 → 14"               = "#E8593C",
      "9 → 14"                = "#3B8BD4",
      "9 → 12"                = "#1D9E75",
      "Stayed in District 12" = "#8B5CF6"
    ),
    labels = c(
      "12 → 14"               = "12 → 14 (redistricted)",
      "9 → 14"                = "9 → 14 (redistricted)",
      "9 → 12"                = "9 → 12 (redistricted)",
      "Stayed in District 12" = "Stayed in District 12 (baseline)"
    )
  ) +
  labs(
    title    = "Turnout Change by Specific District Reassignment",
    subtitle = "Mecklenburg County, NC — among voters active in at least one election",
    x        = "Election Year",
    y        = "Turnout Rate",
    color    = "Group"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title      = element_text(face = "bold"),
    legend.position = "top",
    legend.text     = element_text(size = 10)
  )

Graph 3: Overall Voter Turnout

overall_turnout <- analysis_df |>
  summarise(
    pct_voted_2020 = mean(voted_2020),
    pct_voted_2022 = mean(voted_2022)
  ) |>
  pivot_longer(
    cols      = everything(),
    names_to  = "year",
    values_to = "turnout_rate"
  ) |>
  mutate(year = if_else(year == "pct_voted_2020", "2020", "2022"))

ggplot(overall_turnout, aes(x = year, y = turnout_rate)) +
  geom_col(width = 0.5, fill = "#3B8BD4") +
  geom_text(
    aes(label = scales::percent(turnout_rate, accuracy = 0.1)),
    vjust = -0.5,
    size  = 4.5
  ) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) +
  labs(
    title    = "Overall Voter Turnout in Mecklenburg County",
    subtitle = "Among voters active in at least one election, 2020 vs. 2022",
    x        = "Election Year",
    y        = "Turnout Rate"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

Graph 4: Difference-in-Differences

did_summary <- analysis_df |>
  mutate(
    group = case_when(
      cong_dist_2020 == 12 & cong_dist_2022 == 12 ~ "Stayed in District 12",
      redistricted == 1                            ~ "Redistricted (all)",
      TRUE                                         ~ NA_character_
    )
  ) |>
  filter(!is.na(group)) |>
  group_by(group) |>
  summarise(
    n              = n(),
    pct_voted_2020 = mean(voted_2020),
    pct_voted_2022 = mean(voted_2022)
  )

did_value <- did_summary |>
  select(group, pct_voted_2020, pct_voted_2022) |>
  pivot_wider(names_from = group, values_from = c(pct_voted_2020, pct_voted_2022)) |>
  mutate(
    change_redistricted = `pct_voted_2022_Redistricted (all)` - `pct_voted_2020_Redistricted (all)`,
    change_stayed       = `pct_voted_2022_Stayed in District 12` - `pct_voted_2020_Stayed in District 12`,
    diff_in_diff        = change_redistricted - change_stayed
  )

did_long <- did_summary |>
  pivot_longer(
    cols      = c(pct_voted_2020, pct_voted_2022),
    names_to  = "year",
    values_to = "turnout_rate"
  ) |>
  mutate(year = if_else(year == "pct_voted_2020", "2020", "2022"))

did_labels_2020 <- did_long |>
  filter(year == "2020") |>
  arrange(desc(turnout_rate)) |>
  mutate(label_y = c(0.80, 0.74))

did_labels_2022 <- did_long |>
  filter(year == "2022") |>
  arrange(desc(turnout_rate)) |>
  mutate(label_y = c(0.58, 0.52))

ggplot(did_long, aes(x = year, y = turnout_rate,
                     group = group, color = group)) +
  geom_line(linewidth = 1.4) +
  geom_point(size = 4) +
  geom_segment(
    data = did_labels_2020,
    aes(x = 0.90, xend = 0.97, y = label_y, yend = turnout_rate),
    linewidth = 0.3, linetype = "dotted"
  ) +
  geom_segment(
    data = did_labels_2022,
    aes(x = 2.03, xend = 2.10, y = turnout_rate, yend = label_y),
    linewidth = 0.3, linetype = "dotted"
  ) +
  geom_text(
    data  = did_labels_2020,
    aes(x = 0.88, y = label_y,
        label = scales::percent(turnout_rate, accuracy = 0.1),
        color = group),
    hjust = 1, size = 4
  ) +
  geom_text(
    data  = did_labels_2022,
    aes(x = 2.12, y = label_y,
        label = scales::percent(turnout_rate, accuracy = 0.1),
        color = group),
    hjust = 0, size = 4
  ) +
  annotate(
    "text",
    x        = 1.5,
    y        = 0.95,
    label    = paste0("DiD = ",
                      scales::percent(did_value$diff_in_diff, accuracy = 0.1)),
    size     = 4.5,
    hjust    = 0.5,
    color    = "black",
    fontface = "italic"
  ) +
  scale_x_discrete(limits = c("2020", "2022"),
                   expand  = expansion(mult = 0.4)) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) +
  scale_color_manual(
    values = c(
      "Redistricted (all)"    = "#E8593C",
      "Stayed in District 12" = "#8B5CF6"
    )
  ) +
  labs(
    title    = "Difference-in-Differences: Redistricted vs. Stayed in District 12",
    subtitle = "Mecklenburg County, NC — among voters active in at least one election",
    x        = "Election Year",
    y        = "Turnout Rate",
    color    = "Group"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title      = element_text(face = "bold"),
    legend.position = "top",
    legend.text     = element_text(size = 11)
  )