# Load necessary libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(broom)
## Warning: package 'broom' was built under R version 4.4.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
# Note: If stargazer is not installed, you may need to run: install.packages("stargazer")
library(stargazer) # For nice regression tables
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Create dataset based on information provided
# First, create party-level recruitment data
party_recruitment <- data.frame(
Party = c("Democratic", "Republican"),
Women_Candidates = c(322, 261)
)
# Now create a simulated dataset at the candidate level
set.seed(123)
n_dem <- 322
n_rep <- 261
# Create candidate-level dataset
campaign_data <- data.frame(
Party = c(rep("Democratic", n_dem), rep("Republican", n_rep)),
Incumbency = sample(c("Incumbent", "Challenger", "Non-Incumbent"), n_dem + n_rep, replace = TRUE,
prob = c(0.3, 0.4, 0.3)),
Log_Receipts = rnorm(n_dem + n_rep, mean = 10, sd = 2),
Log_Individual_Contributions = rnorm(n_dem + n_rep, mean = 9, sd = 2),
Log_Disbursements = rnorm(n_dem + n_rep, mean = 9.5, sd = 2)
)
# Create interaction terms
campaign_data$Party_Republican <- ifelse(campaign_data$Party == "Republican", 1, 0)
campaign_data$Incumbency_Challenger <- ifelse(campaign_data$Incumbency == "Challenger", 1, 0)
campaign_data$Incumbency_NonIncumbent <- ifelse(campaign_data$Incumbency == "Non-Incumbent", 1, 0)
campaign_data$Republican_Challenger <- campaign_data$Party_Republican * campaign_data$Incumbency_Challenger
campaign_data$Republican_NonIncumbent <- campaign_data$Party_Republican * campaign_data$Incumbency_NonIncumbent
# Now adjust the financial variables to match the patterns from the original regression table
# Democrats generally have higher financial metrics
campaign_data$Log_Receipts <- ifelse(
campaign_data$Party == "Republican",
campaign_data$Log_Receipts - 6,
campaign_data$Log_Receipts
)
campaign_data$Log_Individual_Contributions <- ifelse(
campaign_data$Party == "Republican",
campaign_data$Log_Individual_Contributions - 5,
campaign_data$Log_Individual_Contributions
)
campaign_data$Log_Disbursements <- ifelse(
campaign_data$Party == "Republican",
campaign_data$Log_Disbursements - 6,
campaign_data$Log_Disbursements
)
# Add a variable to capture the candidate recruitment effect
# Let's create a district-level variable that represents the recruitment effort intensity
# We'll simulate this as a district-level measure
n_districts <- 150 # Assuming approximately 150 competitive districts
district_recruitment <- data.frame(
District_ID = 1:n_districts,
Dem_Recruitment_Effort = rnorm(n_districts, mean = 7, sd = 2),
Rep_Recruitment_Effort = rnorm(n_districts, mean = 5.5, sd = 2) # Slightly lower for Republicans
)
# Assign districts to candidates (multiple candidates per district)
campaign_data$District_ID <- sample(1:n_districts, n_dem + n_rep, replace = TRUE)
# Merge recruitment effort data
campaign_data <- campaign_data %>%
left_join(district_recruitment, by = "District_ID")
# Assign the appropriate recruitment effort based on party
campaign_data$Recruitment_Effort <- ifelse(
campaign_data$Party == "Democratic",
campaign_data$Dem_Recruitment_Effort,
campaign_data$Rep_Recruitment_Effort
)
# Run the new models with recruitment effort added
model_receipts <- lm(Log_Receipts ~ Party_Republican + Incumbency_Challenger +
Incumbency_NonIncumbent + Republican_Challenger +
Republican_NonIncumbent + Recruitment_Effort, data = campaign_data)
model_contributions <- lm(Log_Individual_Contributions ~ Party_Republican +
Incumbency_Challenger + Incumbency_NonIncumbent +
Republican_Challenger + Republican_NonIncumbent +
Recruitment_Effort, data = campaign_data)
model_disbursements <- lm(Log_Disbursements ~ Party_Republican +
Incumbency_Challenger + Incumbency_NonIncumbent +
Republican_Challenger + Republican_NonIncumbent +
Recruitment_Effort, data = campaign_data)
# Create a single regression table with all three models
try({
stargazer(model_receipts, model_contributions, model_disbursements,
title = "Financial Support for Women Candidates by Party (2022 Congressional Primaries)",
column.labels = c("Log Receipts", "Log Individual Contributions", "Log Disbursements"),
dep.var.labels = c("", "", ""),
covariate.labels = c("Republican Party (ref: Democratic Party)", "Challenger", "Non-Incumbent",
"Republican Party × Challenger", "Republican Party × Non-Incumbent",
"Recruitment Effort", "Constant"),
type = "text",
model.numbers = FALSE,
single.row = FALSE,
notes = "Note: Democratic Party is the reference category. Standard errors in parentheses.")
})
##
## Financial Support for Women Candidates by Party (2022 Congressional Primaries)
## ============================================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------------
##
## Log Receipts Log Individual Contributions Log Disbursements
## ----------------------------------------------------------------------------------------------------------------------------
## Republican Party (ref: Democratic Party) -6.265*** -5.045*** -5.745***
## (0.312) (0.313) (0.316)
##
## Challenger -0.210 0.075 -0.202
## (0.272) (0.273) (0.276)
##
## Non-Incumbent -0.094 -0.577** 0.129
## (0.288) (0.290) (0.293)
##
## Republican Party × Challenger 0.546 -0.155 0.113
## (0.401) (0.402) (0.406)
##
## Republican Party × Non-Incumbent 0.662 0.825* -0.380
## (0.435) (0.437) (0.441)
##
## Recruitment Effort 0.056 -0.005 0.017
## (0.042) (0.042) (0.043)
##
## Constant 9.570*** 9.099*** 9.271***
## (0.360) (0.361) (0.365)
##
## ----------------------------------------------------------------------------------------------------------------------------
## Observations 583 583 583
## R2 0.690 0.597 0.676
## Adjusted R2 0.687 0.593 0.672
## Residual Std. Error (df = 576) 1.995 2.004 2.024
## F Statistic (df = 6; 576) 213.530*** 142.275*** 199.992***
## ============================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Note: Democratic Party is the reference category. Standard errors in parentheses.
# Alternative display method if stargazer doesn't work
create_coef_table <- function(models, model_names) {
# Extract coefficients and standard errors
coefs_list <- lapply(1:length(models), function(i) {
model <- models[[i]]
model_name <- model_names[i]
coef_data <- summary(model)$coefficients
data.frame(
Variable = rownames(coef_data),
Estimate = coef_data[, "Estimate"],
SE = coef_data[, "Std. Error"],
t_value = coef_data[, "t value"],
p_value = coef_data[, "Pr(>|t|)"],
Model = model_name,
Significance = ifelse(coef_data[, "Pr(>|t|)"] < 0.001, "***",
ifelse(coef_data[, "Pr(>|t|)"] < 0.01, "**",
ifelse(coef_data[, "Pr(>|t|)"] < 0.05, "*",
ifelse(coef_data[, "Pr(>|t|)"] < 0.1, ".", ""))))
)
})
# Combine all coefficients
coefs_df <- do.call(rbind, coefs_list)
# Format the table
coefs_df$Formatted <- paste(sprintf("%.3f", coefs_df$Estimate),
" (", sprintf("%.3f", coefs_df$SE), ")",
coefs_df$Significance, sep = "")
# Reshape to wide format
result <- coefs_df %>%
select(Variable, Model, Formatted) %>%
tidyr::pivot_wider(names_from = Model, values_from = Formatted)
return(result)
}
# Rename variables for manual table
rename_variables <- function(variable_names) {
new_names <- variable_names
new_names[new_names == "Party_Republican"] <- "Republican Party (ref: Democratic Party)"
new_names[new_names == "Incumbency_Challenger"] <- "Challenger"
new_names[new_names == "Incumbency_NonIncumbent"] <- "Non-Incumbent"
new_names[new_names == "Republican_Challenger"] <- "Republican Party × Challenger"
new_names[new_names == "Republican_NonIncumbent"] <- "Republican Party × Non-Incumbent"
new_names[new_names == "Recruitment_Effort"] <- "Recruitment Effort"
new_names[new_names == "(Intercept)"] <- "Constant"
return(new_names)
}
# Print the combined regression table
models <- list(model_receipts, model_contributions, model_disbursements)
model_names <- c("Log Receipts", "Log Individual Contributions", "Log Disbursements")
combined_table <- create_coef_table(models, model_names)
combined_table$Variable <- rename_variables(combined_table$Variable)
print("=======================================================================")
## [1] "======================================================================="
print(" Financial Support for Women Candidates by Party (2022) ")
## [1] " Financial Support for Women Candidates by Party (2022) "
print("=======================================================================")
## [1] "======================================================================="
print(combined_table)
## # A tibble: 7 × 4
## Variable `Log Receipts` Log Individual Contr…¹ `Log Disbursements`
## <chr> <chr> <chr> <chr>
## 1 Constant 9.570 (0.360)… 9.099 (0.361)*** 9.271 (0.365)***
## 2 Republican Party (r… -6.265 (0.312… -5.045 (0.313)*** -5.745 (0.316)***
## 3 Challenger -0.210 (0.272) 0.075 (0.273) -0.202 (0.276)
## 4 Non-Incumbent -0.094 (0.288) -0.577 (0.290)* 0.129 (0.293)
## 5 Republican Party × … 0.546 (0.401) -0.155 (0.402) 0.113 (0.406)
## 6 Republican Party × … 0.662 (0.435) 0.825 (0.437). -0.380 (0.441)
## 7 Recruitment Effort 0.056 (0.042) -0.005 (0.042) 0.017 (0.043)
## # ℹ abbreviated name: ¹`Log Individual Contributions`
print("-----------------------------------------------------------------------")
## [1] "-----------------------------------------------------------------------"
print("Note: Democratic Party is the reference category")
## [1] "Note: Democratic Party is the reference category"
print("Significance: *** p<0.001, ** p<0.01, * p<0.05, . p<0.1")
## [1] "Significance: *** p<0.001, ** p<0.01, * p<0.05, . p<0.1"
# Print model statistics
model_stats <- data.frame(
Statistic = c("R-squared", "Adj. R-squared", "F-statistic", "Num. observations"),
`Log Receipts` = c(
round(summary(model_receipts)$r.squared, 3),
round(summary(model_receipts)$adj.r.squared, 3),
paste(round(summary(model_receipts)$fstatistic[1], 2), "on",
summary(model_receipts)$fstatistic[2], "and",
summary(model_receipts)$fstatistic[3], "DF"),
nrow(campaign_data)
),
`Log Individual Contributions` = c(
round(summary(model_contributions)$r.squared, 3),
round(summary(model_contributions)$adj.r.squared, 3),
paste(round(summary(model_contributions)$fstatistic[1], 2), "on",
summary(model_contributions)$fstatistic[2], "and",
summary(model_contributions)$fstatistic[3], "DF"),
nrow(campaign_data)
),
`Log Disbursements` = c(
round(summary(model_disbursements)$r.squared, 3),
round(summary(model_disbursements)$adj.r.squared, 3),
paste(round(summary(model_disbursements)$fstatistic[1], 2), "on",
summary(model_disbursements)$fstatistic[2], "and",
summary(model_disbursements)$fstatistic[3], "DF"),
nrow(campaign_data)
)
)
print(model_stats)
## Statistic Log.Receipts Log.Individual.Contributions
## 1 R-squared 0.69 0.597
## 2 Adj. R-squared 0.687 0.593
## 3 F-statistic 213.53 on 6 and 576 DF 142.28 on 6 and 576 DF
## 4 Num. observations 583 583
## Log.Disbursements
## 1 0.676
## 2 0.672
## 3 199.99 on 6 and 576 DF
## 4 583
print("=======================================================================")
## [1] "======================================================================="