This analysis examines whether congressional redistricting in Mecklenburg County, NC affected voter turnout between the 2020 and 2022 general elections.
Is redistricting correlated to a change in turnout?
A negative correlation exists between the occurrence of redistricting and a decrease in voter turnout for voters whose districts changed as a result.
library(tidyverse)
Snapshot2020 <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/VR_Snapshot_20201103_reencoded 2.csv", col_types = "__cc_c______________________________________________c______________________________________")
MECKLENBURG_data_2020 <- Snapshot2020 |>
filter(county_desc == "MECKLENBURG") |>
rename(cong_dist_2020 = cong_dist_abbrv) |>
select(-status_cd)
write_csv(MECKLENBURG_data_2020, "MECKLENBURG_data_2020.csv")
Snapshot2022 <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/VR_Snapshot_20221108_reencoded 2.csv", col_types = "__cc_c______________________________________________c______________________________________")
MECKLENBURG_data_2022 <- Snapshot2022 |>
filter(county_desc == "MECKLENBURG") |>
filter(status_cd == "A" | status_cd == "I") |>
rename(cong_dist_2022 = cong_dist_abbrv) |>
select(-status_cd)
MECKLENBURG_data_2022 <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/MECKLENBURG_data_2022_3.csv", col_types = "ccc")
JoinedNCFile <- inner_join(MECKLENBURG_data_2020, MECKLENBURG_data_2022, join_by(county_desc, voter_reg_num))
write_csv(JoinedNCFile, "JoinedNCfile.csv")
MecklenburgVoterHistory <- read_tsv("~/Downloads/ncvhis60.txt", col_types = "_ccc___________")
MecklenburgVoterHistory <- MecklenburgVoterHistory |>
filter(election_lbl == "11/03/2020" | election_lbl == "11/08/2022")
write_csv(MecklenburgVoterHistory, "MecklenburgVoterHistory.csv")
JoinedNCfile <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/JoinedNCfile.csv")
MecklenburgVoterHistory <- read_csv("~/Library/CloudStorage/OneDrive-UniversityofFlorida/MecklenburgVoterHistory.csv")
district_changes <- JoinedNCfile |>
select(voter_reg_num, cong_dist_2020, cong_dist_2022) |>
mutate(
cong_dist_2020 = as.integer(cong_dist_2020),
cong_dist_2022 = as.integer(cong_dist_2022),
redistricted = cong_dist_2020 != cong_dist_2022,
district_change = if_else(
redistricted,
paste0(cong_dist_2020, " → ", cong_dist_2022),
"no change"
)
)
district_changes |> count(redistricted)
## # A tibble: 3 × 2
## redistricted n
## <lgl> <int>
## 1 FALSE 345488
## 2 TRUE 357641
## 3 NA 274
district_changes |> filter(redistricted) |> count(district_change, sort = TRUE)
## # A tibble: 3 × 2
## district_change n
## <chr> <int>
## 1 12 → 14 212857
## 2 9 → 14 142986
## 3 9 → 12 1798
turnout <- MecklenburgVoterHistory |>
filter(election_lbl %in% c("11/03/2020", "11/08/2022")) |>
mutate(year = if_else(str_detect(election_lbl, "11/03/2020"), "2020", "2022")) |>
pivot_wider(
id_cols = voter_reg_num,
names_from = year,
names_prefix = "voted_",
values_from = election_lbl,
values_fn = ~ 1L,
values_fill = 0L
)
glimpse(turnout)
## Rows: 585,372
## Columns: 3
## $ voter_reg_num <chr> "001000314697", "000999856264", "000999870499", "0010009…
## $ voted_2020 <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ voted_2022 <int> 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,…
analysis_df <- district_changes |>
left_join(turnout, by = "voter_reg_num") |>
mutate(
voted_2020 = replace_na(voted_2020, 0L),
voted_2022 = replace_na(voted_2022, 0L),
redistricted = as.integer(redistricted)
) |>
filter(!is.na(redistricted))
glimpse(analysis_df)
## Rows: 703,129
## Columns: 7
## $ voter_reg_num <chr> "000000014843", "000000012829", "000000013499", "00000…
## $ cong_dist_2020 <int> 12, 12, 12, 9, 9, 9, 9, 9, 12, 12, 9, 12, 12, 12, 12, …
## $ cong_dist_2022 <int> 14, 12, 12, 14, 14, 14, 14, 14, 12, 12, 14, 14, 14, 12…
## $ redistricted <int> 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, …
## $ district_change <chr> "12 → 14", "no change", "no change", "9 → 14", "9 → 14…
## $ voted_2020 <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, …
## $ voted_2022 <int> 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, …
cat("Voters active in at least one election:", nrow(analysis_df), "\n")
## Voters active in at least one election: 703129
cat("Voted in 2020:", sum(analysis_df$voted_2020), "\n")
## Voted in 2020: 495186
cat("Voted in 2022:", sum(analysis_df$voted_2022), "\n")
## Voted in 2022: 306081
turnout_summary <- analysis_df |>
group_by(redistricted) |>
summarise(
n = n(),
pct_voted_2020 = mean(voted_2020),
pct_voted_2022 = mean(voted_2022),
turnout_change = mean(voted_2022) - mean(voted_2020)
)
print(turnout_summary)
## # A tibble: 2 × 5
## redistricted n pct_voted_2020 pct_voted_2022 turnout_change
## <int> <int> <dbl> <dbl> <dbl>
## 1 0 345488 0.685 0.404 -0.281
## 2 1 357641 0.723 0.466 -0.257
chisq.test(table(analysis_df$redistricted, analysis_df$voted_2022))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(analysis_df$redistricted, analysis_df$voted_2022)
## X-squared = 2747.8, df = 1, p-value < 2.2e-16
model <- glm(
voted_2022 ~ redistricted + voted_2020,
data = analysis_df,
family = binomial(link = "logit")
)
summary(model)
##
## Call:
## glm(formula = voted_2022 ~ redistricted + voted_2020, family = binomial(link = "logit"),
## data = analysis_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.921230 0.015337 -255.67 <2e-16 ***
## redistricted 0.229281 0.005728 40.03 <2e-16 ***
## voted_2020 4.245963 0.015313 277.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 962942 on 703128 degrees of freedom
## Residual deviance: 704748 on 703126 degrees of freedom
## AIC: 704754
##
## Number of Fisher Scoring iterations: 6
exp(cbind(OR = coef(model), confint.default(model)))
## OR 2.5 % 97.5 %
## (Intercept) 0.01981671 0.01922988 0.02042145
## redistricted 1.25769546 1.24365455 1.27189489
## voted_2020 69.82295618 67.75852869 71.95028145
change_summary <- analysis_df |>
filter(redistricted == 1) |>
group_by(district_change) |>
summarise(
n = n(),
pct_voted_2022 = mean(voted_2022),
pct_voted_2020 = mean(voted_2020)
) |>
arrange(desc(n))
print(change_summary)
## # A tibble: 3 × 4
## district_change n pct_voted_2022 pct_voted_2020
## <chr> <int> <dbl> <dbl>
## 1 12 → 14 212857 0.419 0.689
## 2 9 → 14 142986 0.534 0.771
## 3 9 → 12 1798 0.541 0.800
turnout_long <- turnout_summary |>
select(redistricted, pct_voted_2020, pct_voted_2022) |>
pivot_longer(
cols = c(pct_voted_2020, pct_voted_2022),
names_to = "year",
values_to = "turnout_rate"
) |>
mutate(
year = if_else(year == "pct_voted_2020", "2020", "2022"),
voter_group = case_when(
redistricted == 0 ~ "Not redistricted",
redistricted == 1 ~ "Redistricted",
TRUE ~ "Unknown"
)
) |>
filter(voter_group != "Unknown")
ggplot(turnout_long, aes(x = year, y = turnout_rate, fill = voter_group)) +
geom_col(position = "dodge", width = 0.6) +
geom_text(
aes(label = scales::percent(turnout_rate, accuracy = 0.1)),
position = position_dodge(width = 0.6),
vjust = -0.5,
size = 3.5
) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) +
scale_fill_manual(values = c("Not redistricted" = "#3B8BD4", "Redistricted" = "#E8593C")) +
labs(
title = "Voter Turnout in 2020 vs 2022 by Redistricting Status",
subtitle = "Mecklenburg County, NC — among voters active in at least one election",
x = "Election Year",
y = "Turnout Rate",
fill = NULL
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold"),
legend.position = "top"
)
change_summary_with_stayers <- analysis_df |>
mutate(
group = case_when(
cong_dist_2020 == 12 & cong_dist_2022 == 12 ~ "Stayed in District 12",
redistricted == 1 ~ district_change,
TRUE ~ NA_character_
)
) |>
filter(!is.na(group)) |>
group_by(group) |>
summarise(
n = n(),
pct_voted_2020 = mean(voted_2020),
pct_voted_2022 = mean(voted_2022)
) |>
arrange(desc(n))
change_long <- change_summary_with_stayers |>
pivot_longer(
cols = c(pct_voted_2020, pct_voted_2022),
names_to = "year",
values_to = "turnout_rate"
) |>
mutate(year = if_else(year == "pct_voted_2020", "2020", "2022"))
labels_2020 <- change_long |>
filter(year == "2020") |>
arrange(desc(turnout_rate)) |>
mutate(label_y = c(1.00, 0.94, 0.88, 0.82))
labels_2022 <- change_long |>
filter(year == "2022") |>
arrange(desc(turnout_rate)) |>
mutate(label_y = c(0.72, 0.66, 0.60, 0.54))
ggplot(change_long, aes(x = year, y = turnout_rate,
group = group, color = group)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_segment(
data = labels_2020,
aes(x = 0.65, xend = 0.78, y = label_y, yend = turnout_rate),
linewidth = 0.3, linetype = "dotted"
) +
geom_segment(
data = labels_2022,
aes(x = 2.22, xend = 2.35, y = turnout_rate, yend = label_y),
linewidth = 0.3, linetype = "dotted"
) +
geom_text(
data = labels_2020,
aes(x = 0.62, y = label_y,
label = scales::percent(turnout_rate, accuracy = 0.1),
color = group),
hjust = 1, size = 3.2
) +
geom_text(
data = labels_2022,
aes(x = 2.38, y = label_y,
label = scales::percent(turnout_rate, accuracy = 0.1),
color = group),
hjust = 0, size = 3.2
) +
scale_x_discrete(limits = c("2020", "2022"),
expand = expansion(mult = 0.5)) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1.05)) +
scale_color_manual(
values = c(
"12 → 14" = "#E8593C",
"9 → 14" = "#3B8BD4",
"9 → 12" = "#1D9E75",
"Stayed in District 12" = "#8B5CF6"
),
labels = c(
"12 → 14" = "12 → 14 (redistricted)",
"9 → 14" = "9 → 14 (redistricted)",
"9 → 12" = "9 → 12 (redistricted)",
"Stayed in District 12" = "Stayed in District 12 (baseline)"
)
) +
labs(
title = "Turnout Change by Specific District Reassignment",
subtitle = "Mecklenburg County, NC — among voters active in at least one election",
x = "Election Year",
y = "Turnout Rate",
color = "Group"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold"),
legend.position = "top",
legend.text = element_text(size = 10)
)
overall_turnout <- analysis_df |>
summarise(
pct_voted_2020 = mean(voted_2020),
pct_voted_2022 = mean(voted_2022)
) |>
pivot_longer(
cols = everything(),
names_to = "year",
values_to = "turnout_rate"
) |>
mutate(year = if_else(year == "pct_voted_2020", "2020", "2022"))
ggplot(overall_turnout, aes(x = year, y = turnout_rate)) +
geom_col(width = 0.5, fill = "#3B8BD4") +
geom_text(
aes(label = scales::percent(turnout_rate, accuracy = 0.1)),
vjust = -0.5,
size = 4.5
) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) +
labs(
title = "Overall Voter Turnout in Mecklenburg County",
subtitle = "Among voters active in at least one election, 2020 vs. 2022",
x = "Election Year",
y = "Turnout Rate"
) +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))
did_summary <- analysis_df |>
mutate(
group = case_when(
cong_dist_2020 == 12 & cong_dist_2022 == 12 ~ "Stayed in District 12",
redistricted == 1 ~ "Redistricted (all)",
TRUE ~ NA_character_
)
) |>
filter(!is.na(group)) |>
group_by(group) |>
summarise(
n = n(),
pct_voted_2020 = mean(voted_2020),
pct_voted_2022 = mean(voted_2022)
)
did_value <- did_summary |>
select(group, pct_voted_2020, pct_voted_2022) |>
pivot_wider(names_from = group, values_from = c(pct_voted_2020, pct_voted_2022)) |>
mutate(
change_redistricted = `pct_voted_2022_Redistricted (all)` - `pct_voted_2020_Redistricted (all)`,
change_stayed = `pct_voted_2022_Stayed in District 12` - `pct_voted_2020_Stayed in District 12`,
diff_in_diff = change_redistricted - change_stayed
)
did_long <- did_summary |>
pivot_longer(
cols = c(pct_voted_2020, pct_voted_2022),
names_to = "year",
values_to = "turnout_rate"
) |>
mutate(year = if_else(year == "pct_voted_2020", "2020", "2022"))
did_labels_2020 <- did_long |>
filter(year == "2020") |>
arrange(desc(turnout_rate)) |>
mutate(label_y = c(0.80, 0.74))
did_labels_2022 <- did_long |>
filter(year == "2022") |>
arrange(desc(turnout_rate)) |>
mutate(label_y = c(0.58, 0.52))
ggplot(did_long, aes(x = year, y = turnout_rate,
group = group, color = group)) +
geom_line(linewidth = 1.4) +
geom_point(size = 4) +
geom_segment(
data = did_labels_2020,
aes(x = 0.90, xend = 0.97, y = label_y, yend = turnout_rate),
linewidth = 0.3, linetype = "dotted"
) +
geom_segment(
data = did_labels_2022,
aes(x = 2.03, xend = 2.10, y = turnout_rate, yend = label_y),
linewidth = 0.3, linetype = "dotted"
) +
geom_text(
data = did_labels_2020,
aes(x = 0.88, y = label_y,
label = scales::percent(turnout_rate, accuracy = 0.1),
color = group),
hjust = 1, size = 4
) +
geom_text(
data = did_labels_2022,
aes(x = 2.12, y = label_y,
label = scales::percent(turnout_rate, accuracy = 0.1),
color = group),
hjust = 0, size = 4
) +
annotate(
"text",
x = 1.5,
y = 0.95,
label = paste0("DiD = ",
scales::percent(did_value$diff_in_diff, accuracy = 0.1)),
size = 4.5,
hjust = 0.5,
color = "black",
fontface = "italic"
) +
scale_x_discrete(limits = c("2020", "2022"),
expand = expansion(mult = 0.4)) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) +
scale_color_manual(
values = c(
"Redistricted (all)" = "#E8593C",
"Stayed in District 12" = "#8B5CF6"
)
) +
labs(
title = "Difference-in-Differences: Redistricted vs. Stayed in District 12",
subtitle = "Mecklenburg County, NC — among voters active in at least one election",
x = "Election Year",
y = "Turnout Rate",
color = "Group"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold"),
legend.position = "top",
legend.text = element_text(size = 11)
)