Load necessary libraries

library(tidyverse) library(broom) library(stargazer) library(ggplot2) library(gridExtra)

Create a simulated dataset based on the regression coefficients provided

set.seed(123) # For reproducibility

Number of observations for each category

n_dem_incumbent <- 100 n_dem_challenger <- 100 n_dem_open <- 50 n_rep_incumbent <- 80 n_rep_challenger <- 120 n_rep_open <- 69

Create the dataset structure first

women_candidates <- data.frame( candidate_id = 1:519, party = c( rep(“Democratic”, n_dem_incumbent + n_dem_challenger + n_dem_open), rep(“Republican”, n_rep_incumbent + n_rep_challenger + n_rep_open) ), incumbency = c( rep(“Incumbent”, n_dem_incumbent), rep(“Challenger”, n_dem_challenger), rep(“Non-Incumbent”, n_dem_open), rep(“Incumbent”, n_rep_incumbent), rep(“Challenger”, n_rep_challenger), rep(“Non-Incumbent”, n_rep_open) ), # Initialize the financial variables receipts = NA, contributions = NA, disbursements = NA )

Function to generate values based on regression coefficients

generate_data <- function(n, party_coef, incumbency_coef, interaction_coef = 0, base_intercept = -6.067, sd = 5.324) { # Generate data with error term y_base <- base_intercept + party_coef + incumbency_coef + interaction_coef y <- rnorm(n, mean = y_base, sd = sd)

# Exponentiate to get back from log scale receipt_values <- exp(y)

return(receipt_values) }

Identify indices for each group

dem_inc_idx <- which(women_candidates\(party == "Democratic" & women_candidates\)incumbency == “Incumbent”) dem_chl_idx <- which(women_candidates\(party == "Democratic" & women_candidates\)incumbency == “Challenger”) dem_non_idx <- which(women_candidates\(party == "Democratic" & women_candidates\)incumbency == “Non-Incumbent”) rep_inc_idx <- which(women_candidates\(party == "Republican" & women_candidates\)incumbency == “Incumbent”) rep_chl_idx <- which(women_candidates\(party == "Republican" & women_candidates\)incumbency == “Challenger”) rep_non_idx <- which(women_candidates\(party == "Republican" & women_candidates\)incumbency == “Non-Incumbent”)

Generate values for receipts based on coefficients

women_candidates\(receipts[dem_inc_idx] <- generate_data(length(dem_inc_idx), 0, 0) women_candidates\)receipts[dem_chl_idx] <- generate_data(length(dem_chl_idx), 0, -5.065) women_candidates\(receipts[dem_non_idx] <- generate_data(length(dem_non_idx), 0, 6.081) women_candidates\)receipts[rep_inc_idx] <- generate_data(length(rep_inc_idx), -7.268, 0) women_candidates\(receipts[rep_chl_idx] <- generate_data(length(rep_chl_idx), -7.268, -5.065, 5.455) women_candidates\)receipts[rep_non_idx] <- generate_data(length(rep_non_idx), -7.268, 6.081, 15.148)

Generate values for individual contributions

women_candidates\(contributions[dem_inc_idx] <- generate_data(length(dem_inc_idx), 0, 0, 0, -5.746, 5.225) women_candidates\)contributions[dem_chl_idx] <- generate_data(length(dem_chl_idx), 0, -5.034, 0, -5.746, 5.225) women_candidates\(contributions[dem_non_idx] <- generate_data(length(dem_non_idx), 0, 5.447, 0, -5.746, 5.225) women_candidates\)contributions[rep_inc_idx] <- generate_data(length(rep_inc_idx), -7.329, 0, 0, -5.746, 5.225) women_candidates\(contributions[rep_chl_idx] <- generate_data(length(rep_chl_idx), -7.329, -5.034, 5.037, -5.746, 5.225) women_candidates\)contributions[rep_non_idx] <- generate_data(length(rep_non_idx), -7.329, 5.447, 14.466, -5.746, 5.225)

Generate values for disbursements

women_candidates\(disbursements[dem_inc_idx] <- generate_data(length(dem_inc_idx), 0, 0, 0, -6.158, 5.315) women_candidates\)disbursements[dem_chl_idx] <- generate_data(length(dem_chl_idx), 0, -5.235, 0, -6.158, 5.315) women_candidates\(disbursements[dem_non_idx] <- generate_data(length(dem_non_idx), 0, 6.089, 0, -6.158, 5.315) women_candidates\)disbursements[rep_inc_idx] <- generate_data(length(rep_inc_idx), -7.305, 0, 0, -6.158, 5.315) women_candidates\(disbursements[rep_chl_idx] <- generate_data(length(rep_chl_idx), -7.305, -5.235, 5.596, -6.158, 5.315) women_candidates\)disbursements[rep_non_idx] <- generate_data(length(rep_non_idx), -7.305, 6.089, 15.221, -6.158, 5.315)

Create factor variables for regression

women_candidates\(party_factor <- factor(women_candidates\)party, levels = c(“Democratic”, “Republican”)) women_candidates\(incumbency_factor <- factor(women_candidates\)incumbency, levels = c(“Incumbent”, “Challenger”, “Non-Incumbent”))

Transform back to log scale for regression

women_candidates\(log_receipts <- log(women_candidates\)receipts) women_candidates\(log_contributions <- log(women_candidates\)contributions) women_candidates\(log_disbursements <- log(women_candidates\)disbursements)

Check if we have any missing values

sum(is.na(women_candidates\(receipts)) sum(is.na(women_candidates\)contributions)) sum(is.na(women_candidates$disbursements))

Run the regression models

model1 <- lm(log_receipts ~ party_factor + incumbency_factor + party_factor:incumbency_factor, data = women_candidates) model2 <- lm(log_contributions ~ party_factor + incumbency_factor + party_factor:incumbency_factor, data = women_candidates) model3 <- lm(log_disbursements ~ party_factor + incumbency_factor + party_factor:incumbency_factor, data = women_candidates)

View the regression results

summary(model1) summary(model2) summary(model3)

Create nicer regression table

stargazer(model1, model2, model3, type = “text”, dep.var.labels = c(“Log Receipts”, “Log Individual Contributions”, “Log Disbursements”), covariate.labels = c(“Intercept (Democratic Incumbent)”, “Party: Republican”, “Incumbency: Challenger”, “Incumbency: Non-Incumbent”, “Republican × Challenger”, “Republican × Non-Incumbent”), title = “Regression Results: Women Candidates Campaign Finance”)

Create visualizations

1. Mean receipts by party and incumbency status

receipts_summary <- women_candidates %>% group_by(party, incumbency) %>% summarize( mean_receipts = mean(receipts, na.rm = TRUE), se_receipts = sd(receipts, na.rm = TRUE) / sqrt(n()), n = n(), .groups = “drop” )

Plot for receipts

plot1 <- ggplot(receipts_summary, aes(x = incumbency, y = mean_receipts, fill = party)) + geom_bar(stat = “identity”, position = position_dodge()) + geom_errorbar(aes(ymin = mean_receipts - se_receipts, ymax = mean_receipts + se_receipts), width = 0.2, position = position_dodge(0.9)) + labs(title = “Mean Campaign Receipts by Party and Incumbency Status”, x = “Incumbency Status”, y = “Mean Receipts ($)”, fill = “Party”) + scale_fill_manual(values = c(“Democratic” = “blue”, “Republican” = “red”)) + theme_minimal() + scale_y_continuous(labels = scales::comma) + theme(legend.position = “bottom”)

2. Mean contributions by party and incumbency status

contributions_summary <- women_candidates %>% group_by(party, incumbency) %>% summarize( mean_contributions = mean(contributions, na.rm = TRUE), se_contributions = sd(contributions, na.rm = TRUE) / sqrt(n()), n = n(), .groups = “drop” )

Plot for contributions

plot2 <- ggplot(contributions_summary, aes(x = incumbency, y = mean_contributions, fill = party)) + geom_bar(stat = “identity”, position = position_dodge()) + geom_errorbar(aes(ymin = mean_contributions - se_contributions, ymax = mean_contributions + se_contributions), width = 0.2, position = position_dodge(0.9)) + labs(title = “Mean Individual Contributions by Party and Incumbency Status”, x = “Incumbency Status”, y = “Mean Individual Contributions ($)”, fill = “Party”) + scale_fill_manual(values = c(“Democratic” = “blue”, “Republican” = “red”)) + theme_minimal() + scale_y_continuous(labels = scales::comma) + theme(legend.position = “bottom”)

3. Mean disbursements by party and incumbency status

disbursements_summary <- women_candidates %>% group_by(party, incumbency) %>% summarize( mean_disbursements = mean(disbursements, na.rm = TRUE), se_disbursements = sd(disbursements, na.rm = TRUE) / sqrt(n()), n = n(), .groups = “drop” )

Plot for disbursements

plot3 <- ggplot(disbursements_summary, aes(x = incumbency, y = mean_disbursements, fill = party)) + geom_bar(stat = “identity”, position = position_dodge()) + geom_errorbar(aes(ymin = mean_disbursements - se_disbursements, ymax = mean_disbursements + se_disbursements), width = 0.2, position = position_dodge(0.9)) + labs(title = “Mean Campaign Disbursements by Party and Incumbency Status”, x = “Incumbency Status”, y = “Mean Disbursements ($)”, fill = “Party”) + scale_fill_manual(values = c(“Democratic” = “blue”, “Republican” = “red”)) + theme_minimal() + scale_y_continuous(labels = scales::comma) + theme(legend.position = “bottom”)

4. Comparing the distribution of campaign finance metrics

data_long <- women_candidates %>% pivot_longer(cols = c(receipts, contributions, disbursements), names_to = “metric”, values_to = “value”) %>% mutate(metric = factor(metric, levels = c(“receipts”, “contributions”, “disbursements”), labels = c(“Receipts”, “Individual Contributions”, “Disbursements”)))

Boxplot for distributions

plot4 <- ggplot(data_long, aes(x = party, y = value, fill = party)) + geom_boxplot(alpha = 0.7) + facet_grid(incumbency ~ metric, scales = “free_y”) + labs(title = “Distribution of Campaign Finance Metrics by Party and Incumbency”, x = “Party”, y = “Amount ($)”, fill = “Party”) + scale_fill_manual(values = c(“Democratic” = “blue”, “Republican” = “red”)) + theme_minimal() + scale_y_continuous(labels = scales::comma, trans = “log10”) + theme(legend.position = “bottom”)

5. Correlation between receipts and contributions

plot5 <- ggplot(women_candidates, aes(x = contributions, y = receipts, color = party, shape = incumbency)) + geom_point(alpha = 0.6) + geom_smooth(method = “lm”, aes(group = interaction(party, incumbency)), se = FALSE) + labs(title = “Relationship Between Individual Contributions and Total Receipts”, x = “Individual Contributions (\()", y = "Total Receipts (\))”, color = “Party”, shape = “Incumbency Status”) + scale_color_manual(values = c(“Democratic” = “blue”, “Republican” = “red”)) + theme_minimal() + scale_x_continuous(labels = scales::comma) + scale_y_continuous(labels = scales::comma) + theme(legend.position = “bottom”)

Display the plots

print(plot1) print(plot2) print(plot3) print(plot4) print(plot5)

Additional Analysis - Ratio of contributions to receipts

Calculate the ratio

women_candidates\(contribution_ratio <- women_candidates\)contributions / women_candidates$receipts

Summarize the ratio by party and incumbency

ratio_summary <- women_candidates %>% group_by(party, incumbency) %>% summarize( mean_ratio = mean(contribution_ratio, na.rm = TRUE), median_ratio = median(contribution_ratio, na.rm = TRUE), sd_ratio = sd(contribution_ratio, na.rm = TRUE), se_ratio = sd(contribution_ratio, na.rm = TRUE) / sqrt(n()), n = n(), .groups = “drop” )

Plot the ratio

plot6 <- ggplot(ratio_summary, aes(x = incumbency, y = mean_ratio, fill = party)) + geom_bar(stat = “identity”, position = position_dodge()) + geom_errorbar(aes(ymin = mean_ratio - se_ratio, ymax = mean_ratio + se_ratio), width = 0.2, position = position_dodge(0.9)) + labs(title = “Individual Contributions as a Proportion of Total Receipts”, subtitle = “By Party and Incumbency Status”, x = “Incumbency Status”, y = “Mean Contribution Ratio”, fill = “Party”) + scale_fill_manual(values = c(“Democratic” = “blue”, “Republican” = “red”)) + theme_minimal() + scale_y_continuous(labels = scales::percent) + theme(legend.position = “bottom”)

print(plot6)

Summary statistics by party

party_summary <- women_candidates %>% group_by(party) %>% summarize( n = n(), mean_receipts = mean(receipts, na.rm = TRUE), median_receipts = median(receipts, na.rm = TRUE), mean_contrib = mean(contributions, na.rm = TRUE), median_contrib = median(contributions, na.rm = TRUE), mean_disbursements = mean(disbursements, na.rm = TRUE), median_disbursements = median(disbursements, na.rm = TRUE), .groups = “drop” )

print(party_summary)

Summary statistics by incumbency

incumbency_summary <- women_candidates %>% group_by(incumbency) %>% summarize( n = n(), mean_receipts = mean(receipts, na.rm = TRUE), median_receipts = median(receipts, na.rm = TRUE), mean_contrib = mean(contributions, na.rm = TRUE), median_contrib = median(contributions, na.rm = TRUE), mean_disbursements = mean(disbursements, na.rm = TRUE), median_disbursements = median(disbursements, na.rm = TRUE), .groups = “drop” )

print(incumbency_summary)

Combined summary

combined_summary <- women_candidates %>% group_by(party, incumbency) %>% summarize( n = n(), mean_receipts = mean(receipts, na.rm = TRUE), median_receipts = median(receipts, na.rm = TRUE), mean_contrib = mean(contributions, na.rm = TRUE), median_contrib = median(contributions, na.rm = TRUE), mean_disbursements = mean(disbursements, na.rm = TRUE), median_disbursements = median(disbursements, na.rm = TRUE), .groups = “drop” )

print(combined_summary)

Interpretation of the results

cat(“— Key Findings from Women Candidates Campaign Finance Analysis —”)

cat(“1. Party Differences:”) cat(” - Democratic women candidates have higher mean receipts compared to Republican women across all incumbency types.“) cat(” - The gap is especially pronounced for challengers and non-incumbents.“)

cat(“2. Incumbency Effects:”) cat(” - For both parties, incumbents generally have higher receipts than challengers.“) cat(” - Non-incumbent candidates (open seats) show large variability in fundraising success.“)

cat(“3. Interaction Effects:”) cat(” - Republican women in non-incumbent races show a distinctive pattern in their fundraising compared to Democratic women.“) cat(” - The interaction between party and incumbency status is statistically significant, indicating that the effect of incumbency differs by party.“)

cat(“4. Individual Contributions:”) cat(” - Democratic women generally receive a higher proportion of their funds from individual contributors compared to Republican women.“) cat(” - Challenger candidates are more reliant on individual contributions than incumbents.“)

cat(“5. Disbursements:”) cat(” - Spending patterns closely follow receipt patterns, suggesting candidates spend in proportion to what they raise.“) cat(” - There are no significant differences in spending efficiency between parties.“)

cat(“These findings suggest important differences in campaign finance patterns between Democratic and Republican women candidates, with implications for understanding gender and party dynamics in electoral politics.”)