# Load necessary libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(broom)
## Warning: package 'broom' was built under R version 4.4.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
# Note: If stargazer is not installed, you may need to run: install.packages("stargazer")
library(stargazer)  # For nice regression tables
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Create dataset based on information provided
# First, create party-level recruitment data
party_recruitment <- data.frame(
  Party = c("Democratic", "Republican"),
  Women_Candidates = c(322, 261)
)

# Now create a simulated dataset at the candidate level
set.seed(123)
n_dem <- 322
n_rep <- 261

# Create candidate-level dataset
campaign_data <- data.frame(
  Party = c(rep("Democratic", n_dem), rep("Republican", n_rep)),
  Incumbency = sample(c("Incumbent", "Challenger", "Non-Incumbent"), n_dem + n_rep, replace = TRUE, 
                      prob = c(0.3, 0.4, 0.3)),
  Log_Receipts = rnorm(n_dem + n_rep, mean = 10, sd = 2),
  Log_Individual_Contributions = rnorm(n_dem + n_rep, mean = 9, sd = 2),
  Log_Disbursements = rnorm(n_dem + n_rep, mean = 9.5, sd = 2)
)

# Create interaction terms
campaign_data$Party_Republican <- ifelse(campaign_data$Party == "Republican", 1, 0)
campaign_data$Incumbency_Challenger <- ifelse(campaign_data$Incumbency == "Challenger", 1, 0)
campaign_data$Incumbency_NonIncumbent <- ifelse(campaign_data$Incumbency == "Non-Incumbent", 1, 0)
campaign_data$Republican_Challenger <- campaign_data$Party_Republican * campaign_data$Incumbency_Challenger
campaign_data$Republican_NonIncumbent <- campaign_data$Party_Republican * campaign_data$Incumbency_NonIncumbent

# Now adjust the financial variables to match the patterns from the original regression table
# Democrats generally have higher financial metrics
campaign_data$Log_Receipts <- ifelse(
  campaign_data$Party == "Republican", 
  campaign_data$Log_Receipts - 6, 
  campaign_data$Log_Receipts
)

campaign_data$Log_Individual_Contributions <- ifelse(
  campaign_data$Party == "Republican", 
  campaign_data$Log_Individual_Contributions - 5, 
  campaign_data$Log_Individual_Contributions
)

campaign_data$Log_Disbursements <- ifelse(
  campaign_data$Party == "Republican", 
  campaign_data$Log_Disbursements - 6, 
  campaign_data$Log_Disbursements
)

# Add a variable to capture the candidate recruitment effect
# Let's create a district-level variable that represents the recruitment effort intensity
# We'll simulate this as a district-level measure
n_districts <- 150  # Assuming approximately 150 competitive districts
district_recruitment <- data.frame(
  District_ID = 1:n_districts,
  Dem_Recruitment_Effort = rnorm(n_districts, mean = 7, sd = 2),
  Rep_Recruitment_Effort = rnorm(n_districts, mean = 5.5, sd = 2)  # Slightly lower for Republicans
)

# Assign districts to candidates (multiple candidates per district)
campaign_data$District_ID <- sample(1:n_districts, n_dem + n_rep, replace = TRUE)

# Merge recruitment effort data
campaign_data <- campaign_data %>%
  left_join(district_recruitment, by = "District_ID")

# Assign the appropriate recruitment effort based on party
campaign_data$Recruitment_Effort <- ifelse(
  campaign_data$Party == "Democratic",
  campaign_data$Dem_Recruitment_Effort,
  campaign_data$Rep_Recruitment_Effort
)

# Run the new models with recruitment effort added
model_receipts <- lm(Log_Receipts ~ Party_Republican + Incumbency_Challenger + 
                       Incumbency_NonIncumbent + Republican_Challenger + 
                       Republican_NonIncumbent + Recruitment_Effort, data = campaign_data)

model_contributions <- lm(Log_Individual_Contributions ~ Party_Republican + 
                            Incumbency_Challenger + Incumbency_NonIncumbent + 
                            Republican_Challenger + Republican_NonIncumbent + 
                            Recruitment_Effort, data = campaign_data)

model_disbursements <- lm(Log_Disbursements ~ Party_Republican + 
                            Incumbency_Challenger + Incumbency_NonIncumbent + 
                            Republican_Challenger + Republican_NonIncumbent + 
                            Recruitment_Effort, data = campaign_data)

# Create a single regression table with all three models
try({
  stargazer(model_receipts, model_contributions, model_disbursements,
            title = "Financial Support for Women Candidates by Party (2022 Congressional Primaries)",
            column.labels = c("Log Receipts", "Log Individual Contributions", "Log Disbursements"),
            dep.var.labels = c("", "", ""),
            covariate.labels = c("Republican Party (ref: Democratic Party)", "Challenger", "Non-Incumbent", 
                                 "Republican Party × Challenger", "Republican Party × Non-Incumbent", 
                                 "Recruitment Effort", "Constant"),
            type = "text",
            model.numbers = FALSE,
            single.row = FALSE,
            notes = "Note: Democratic Party is the reference category. Standard errors in parentheses.")
})
## 
## Financial Support for Women Candidates by Party (2022 Congressional Primaries)
## ============================================================================================================================
##                                                                          Dependent variable:                                
##                                          -----------------------------------------------------------------------------------
##                                                                                                                             
##                                                 Log Receipts         Log Individual Contributions     Log Disbursements     
## ----------------------------------------------------------------------------------------------------------------------------
## Republican Party (ref: Democratic Party)          -6.265***                   -5.045***                   -5.745***         
##                                                    (0.312)                     (0.313)                     (0.316)          
##                                                                                                                             
## Challenger                                         -0.210                       0.075                       -0.202          
##                                                    (0.272)                     (0.273)                     (0.276)          
##                                                                                                                             
## Non-Incumbent                                      -0.094                      -0.577**                     0.129           
##                                                    (0.288)                     (0.290)                     (0.293)          
##                                                                                                                             
## Republican Party × Challenger                       0.546                       -0.155                      0.113           
##                                                    (0.401)                     (0.402)                     (0.406)          
##                                                                                                                             
## Republican Party × Non-Incumbent                    0.662                       0.825*                      -0.380          
##                                                    (0.435)                     (0.437)                     (0.441)          
##                                                                                                                             
## Recruitment Effort                                  0.056                       -0.005                      0.017           
##                                                    (0.042)                     (0.042)                     (0.043)          
##                                                                                                                             
## Constant                                          9.570***                     9.099***                    9.271***         
##                                                    (0.360)                     (0.361)                     (0.365)          
##                                                                                                                             
## ----------------------------------------------------------------------------------------------------------------------------
## Observations                                         583                         583                         583            
## R2                                                  0.690                       0.597                       0.676           
## Adjusted R2                                         0.687                       0.593                       0.672           
## Residual Std. Error (df = 576)                      1.995                       2.004                       2.024           
## F Statistic (df = 6; 576)                        213.530***                   142.275***                  199.992***        
## ============================================================================================================================
## Note:                                                                                            *p<0.1; **p<0.05; ***p<0.01
##                                            Note: Democratic Party is the reference category. Standard errors in parentheses.
# Alternative display method if stargazer doesn't work
create_coef_table <- function(models, model_names) {
  # Extract coefficients and standard errors
  coefs_list <- lapply(1:length(models), function(i) {
    model <- models[[i]]
    model_name <- model_names[i]
    
    coef_data <- summary(model)$coefficients
    
    data.frame(
      Variable = rownames(coef_data),
      Estimate = coef_data[, "Estimate"],
      SE = coef_data[, "Std. Error"],
      t_value = coef_data[, "t value"],
      p_value = coef_data[, "Pr(>|t|)"],
      Model = model_name,
      Significance = ifelse(coef_data[, "Pr(>|t|)"] < 0.001, "***",
                            ifelse(coef_data[, "Pr(>|t|)"] < 0.01, "**",
                                   ifelse(coef_data[, "Pr(>|t|)"] < 0.05, "*",
                                          ifelse(coef_data[, "Pr(>|t|)"] < 0.1, ".", ""))))
    )
  })
  
  # Combine all coefficients
  coefs_df <- do.call(rbind, coefs_list)
  
  # Format the table
  coefs_df$Formatted <- paste(sprintf("%.3f", coefs_df$Estimate), 
                              " (", sprintf("%.3f", coefs_df$SE), ")",
                              coefs_df$Significance, sep = "")
  
  # Reshape to wide format
  result <- coefs_df %>%
    select(Variable, Model, Formatted) %>%
    tidyr::pivot_wider(names_from = Model, values_from = Formatted)
  
  return(result)
}

# Rename variables for manual table
rename_variables <- function(variable_names) {
  new_names <- variable_names
  new_names[new_names == "Party_Republican"] <- "Republican Party (ref: Democratic Party)"
  new_names[new_names == "Incumbency_Challenger"] <- "Challenger"
  new_names[new_names == "Incumbency_NonIncumbent"] <- "Non-Incumbent"
  new_names[new_names == "Republican_Challenger"] <- "Republican Party × Challenger"
  new_names[new_names == "Republican_NonIncumbent"] <- "Republican Party × Non-Incumbent"
  new_names[new_names == "Recruitment_Effort"] <- "Recruitment Effort"
  new_names[new_names == "(Intercept)"] <- "Constant"
  return(new_names)
}

# Print the combined regression table
models <- list(model_receipts, model_contributions, model_disbursements)
model_names <- c("Log Receipts", "Log Individual Contributions", "Log Disbursements")

combined_table <- create_coef_table(models, model_names)
combined_table$Variable <- rename_variables(combined_table$Variable)

print("=======================================================================")
## [1] "======================================================================="
print("      Financial Support for Women Candidates by Party (2022)           ")
## [1] "      Financial Support for Women Candidates by Party (2022)           "
print("=======================================================================")
## [1] "======================================================================="
print(combined_table)
## # A tibble: 7 × 4
##   Variable             `Log Receipts` Log Individual Contr…¹ `Log Disbursements`
##   <chr>                <chr>          <chr>                  <chr>              
## 1 Constant             9.570 (0.360)… 9.099 (0.361)***       9.271 (0.365)***   
## 2 Republican Party (r… -6.265 (0.312… -5.045 (0.313)***      -5.745 (0.316)***  
## 3 Challenger           -0.210 (0.272) 0.075 (0.273)          -0.202 (0.276)     
## 4 Non-Incumbent        -0.094 (0.288) -0.577 (0.290)*        0.129 (0.293)      
## 5 Republican Party × … 0.546 (0.401)  -0.155 (0.402)         0.113 (0.406)      
## 6 Republican Party × … 0.662 (0.435)  0.825 (0.437).         -0.380 (0.441)     
## 7 Recruitment Effort   0.056 (0.042)  -0.005 (0.042)         0.017 (0.043)      
## # ℹ abbreviated name: ¹​`Log Individual Contributions`
print("-----------------------------------------------------------------------")
## [1] "-----------------------------------------------------------------------"
print("Note: Democratic Party is the reference category")
## [1] "Note: Democratic Party is the reference category"
print("Significance: *** p<0.001, ** p<0.01, * p<0.05, . p<0.1")
## [1] "Significance: *** p<0.001, ** p<0.01, * p<0.05, . p<0.1"
# Print model statistics
model_stats <- data.frame(
  Statistic = c("R-squared", "Adj. R-squared", "F-statistic", "Num. observations"),
  `Log Receipts` = c(
    round(summary(model_receipts)$r.squared, 3),
    round(summary(model_receipts)$adj.r.squared, 3),
    paste(round(summary(model_receipts)$fstatistic[1], 2), "on", 
          summary(model_receipts)$fstatistic[2], "and", 
          summary(model_receipts)$fstatistic[3], "DF"),
    nrow(campaign_data)
  ),
  `Log Individual Contributions` = c(
    round(summary(model_contributions)$r.squared, 3),
    round(summary(model_contributions)$adj.r.squared, 3),
    paste(round(summary(model_contributions)$fstatistic[1], 2), "on", 
          summary(model_contributions)$fstatistic[2], "and", 
          summary(model_contributions)$fstatistic[3], "DF"),
    nrow(campaign_data)
  ),
  `Log Disbursements` = c(
    round(summary(model_disbursements)$r.squared, 3),
    round(summary(model_disbursements)$adj.r.squared, 3),
    paste(round(summary(model_disbursements)$fstatistic[1], 2), "on", 
          summary(model_disbursements)$fstatistic[2], "and", 
          summary(model_disbursements)$fstatistic[3], "DF"),
    nrow(campaign_data)
  )
)
print(model_stats)
##           Statistic           Log.Receipts Log.Individual.Contributions
## 1         R-squared                   0.69                        0.597
## 2    Adj. R-squared                  0.687                        0.593
## 3       F-statistic 213.53 on 6 and 576 DF       142.28 on 6 and 576 DF
## 4 Num. observations                    583                          583
##        Log.Disbursements
## 1                  0.676
## 2                  0.672
## 3 199.99 on 6 and 576 DF
## 4                    583
print("=======================================================================")
## [1] "======================================================================="