women-candidates-analysis.r

# R CODE FOR WOMEN'S ISSUES PACS ANALYSIS

# Load required libraries
library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.4.3

## Warning: package 'ggplot2' was built under R version 4.4.3

## Warning: package 'tibble' was built under R version 4.4.3

## Warning: package 'tidyr' was built under R version 4.4.3

## Warning: package 'readr' was built under R version 4.4.3

## Warning: package 'purrr' was built under R version 4.4.3

## Warning: package 'dplyr' was built under R version 4.4.3

## Warning: package 'stringr' was built under R version 4.4.3

## Warning: package 'forcats' was built under R version 4.4.3

## Warning: package 'lubridate' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(car)       # For VIF calculations

## Warning: package 'car' was built under R version 4.4.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.4.3

## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(broom)     # For tidy model outputs

## Warning: package 'broom' was built under R version 4.4.3

library(corrplot)  # For correlation plots

## Warning: package 'corrplot' was built under R version 4.4.3

## corrplot 0.95 loaded

# Create the dataset directly in R (based on the data shared earlier)
pacs_data <- data.frame(
  PAC_Name = c("Value in Electing Women PAC", "EMILY's List", "Tri-State Maxed Out Women", 
               "Maggie's List", "Winning for Women", "Women for American Values & Ethics",
               "Electing Women Bay Area", "Vote Mama", "RightNow Women", 
               "National Women's Political Caucus", "Los Angeles Women's Giving Collective",
               "WomenWinning", "Electing Women PAC", "Moms in Office", "Her Bold Move Action",
               "Progressive Women's Alliance of Western Michigan", "Santa Barbara Women's PAC",
               "Women United for Florida Families", "Electing Women Greater Seattle PAC",
               "Run Sister Run PAC", "Matriarch PAC", "Alaska March On", "Ohio Women's Caucus",
               "Persistence PAC", "Womencount PAC"),
  Affiliate = c("REP", "DEM", "DEM", "REP", "REP", "DEM", "DEM", "DEM", "REP", "DEM", 
                "DEM", "DEM", "DEM", "DEM", "DEM", "DEM", "DEM", "DEM", "DEM", "DEM", 
                "DEM", "DEM", "DEM", "DEM", "DEM"),
  Total = c("$558,831", "$480,025", "$397,750", "$106,825", "$71,040", "$59,944",
            "$57,302", "$45,540", "$36,500", "$33,000", "$30,300", "$23,500", "$17,942",
            "$10,750", "$10,250", "$5,000", "$4,250", "$4,005", "$2,806", "$2,500",
            "$2,000", "$1,804", "$1,500", "$1,000", "$341"),
  To_Democrats = c("$0", "$475,025", "$392,750", "$0", "$0", "$59,944",
                   "$57,302", "$45,540", "$0", "$33,000", "$30,300", "$23,500", "$17,942",
                   "$10,250", "$9,750", "$5,000", "$3,750", "$4,005", "$2,806", "$2,500",
                   "$2,000", "$1,804", "$1,500", "$1,000", "$341"),
  To_Republicans = c("$558,831", "$0", "$0", "$106,825", "$71,040", "$0",
                     "$0", "$0", "$36,500", "$0", "$0", "$0", "$0",
                     "$0", "$0", "$0", "$500", "$0", "$0", "$0",
                     "$0", "$0", "$0", "$0", "$0"),
  Lean = c("Solidly Republican/Conservative", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", 
           "Solidly Republican/Conservative", "Solidly Republican/Conservative", "Solidly Democrat/Liberal", 
           "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Republican/Conservative", 
           "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", 
           "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", 
           "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", 
           "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", 
           "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal", 
           "Solidly Democrat/Liberal")
)

# Clean and transform monetary values
pacs_data <- pacs_data %>%
  # Remove $ and commas from monetary values and convert to numeric
  mutate(
    Total = as.numeric(gsub("[$,]", "", Total)),
    To_Democrats = as.numeric(gsub("[$,]", "", To_Democrats)),
    To_Republicans = as.numeric(gsub("[$,]", "", To_Republicans))
  )

# Create additional variables for analysis
pacs_data <- pacs_data %>%
  mutate(
    # Define party affiliation as binary (could also use factor)
    Party_Binary = ifelse(Affiliate == "DEM", 1, 0),
    
    # Create percentage measures for partisan allocation
    Pct_To_Democrats = To_Democrats / Total * 100,
    Pct_To_Republicans = To_Republicans / Total * 100,
    
    # Create variable for PAC size (could use various categorizations)
    PAC_Size = case_when(
      Total < 10000 ~ "Small",
      Total < 100000 ~ "Medium",
      TRUE ~ "Large"
    ),
    
    # Create variables to test various hypotheses
    Exclusively_Partisan = ifelse(Pct_To_Democrats == 100 | Pct_To_Republicans == 100, 1, 0),
    Crossover_Support = ifelse(To_Democrats > 0 & To_Republicans > 0, 1, 0)
  )

# DESCRIPTIVE STATISTICS BY PARTY
party_summary <- pacs_data %>%
  group_by(Affiliate) %>%
  summarise(
    Number_of_PACs = n(),
    Total_Contributions = sum(Total, na.rm = TRUE),
    Mean_Contribution = mean(Total, na.rm = TRUE),
    Median_Contribution = median(Total, na.rm = TRUE),
    Max_Contribution = max(Total, na.rm = TRUE),
    Min_Contribution = min(Total, na.rm = TRUE),
    SD_Contribution = sd(Total, na.rm = TRUE)
  )

print(party_summary)

## # A tibble: 2 × 8
##   Affiliate Number_of_PACs Total_Contributions Mean_Contribution
##   <chr>              <int>               <dbl>             <dbl>
## 1 DEM                   21             1191509            56739.
## 2 REP                    4              773196           193299 
## # ℹ 4 more variables: Median_Contribution <dbl>, Max_Contribution <dbl>,
## #   Min_Contribution <dbl>, SD_Contribution <dbl>

# Calculate per-candidate support metrics
# Note: This is simulated as we don't have actual candidate counts
dem_candidates <- 150  # Simulated number of Democratic women candidates
rep_candidates <- 45   # Simulated number of Republican women candidates

per_candidate_support <- data.frame(
  Party = c("Democratic", "Republican"),
  Total_Support = c(sum(pacs_data$To_Democrats), sum(pacs_data$To_Republicans)),
  Candidate_Count = c(dem_candidates, rep_candidates)
)

per_candidate_support <- per_candidate_support %>%
  mutate(Support_Per_Candidate = Total_Support / Candidate_Count)

print(per_candidate_support)

##        Party Total_Support Candidate_Count Support_Per_Candidate
## 1 Democratic       1180009             150              7866.727
## 2 Republican        773696              45             17193.244

# MULTICOLLINEARITY ASSESSMENT
# Select variables for correlation analysis
model_data <- pacs_data %>%
  select(Total, To_Democrats, To_Republicans, Party_Binary, Exclusively_Partisan)

# Create correlation matrix
cor_matrix <- cor(model_data, use = "complete.obs")
print(cor_matrix)

##                           Total To_Democrats To_Republicans Party_Binary
## Total                 1.0000000    0.6895891      0.6410149   -0.3296974
## To_Democrats          0.6895891    1.0000000     -0.1138047    0.1776076
## To_Republicans        0.6410149   -0.1138047      1.0000000   -0.6403047
## Party_Binary         -0.3296974    0.1776076     -0.6403047    1.0000000
## Exclusively_Partisan -0.3359175   -0.5651765      0.1393817   -0.2182179
##                      Exclusively_Partisan
## Total                          -0.3359175
## To_Democrats                   -0.5651765
## To_Republicans                  0.1393817
## Party_Binary                   -0.2182179
## Exclusively_Partisan            1.0000000

# Visualize correlation matrix
corrplot(cor_matrix, method = "circle", type = "upper", 
         tl.col = "black", tl.srt = 45)

# REGRESSION MODELS
# Define dependent variable: For example, Total contribution amount
model1 <- lm(Total ~ Party_Binary, data = pacs_data)
summary(model1)

## 
## Call:
## lm(formula = Total ~ Party_Binary, data = pacs_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -156799  -54935  -51739  -23739  423286 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    193299      74730   2.587   0.0165 *
## Party_Binary  -136561      81538  -1.675   0.1075  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 149500 on 23 degrees of freedom
## Multiple R-squared:  0.1087, Adjusted R-squared:  0.06995 
## F-statistic: 2.805 on 1 and 23 DF,  p-value: 0.1075

# More complex model with additional variables
model2 <- lm(Total ~ Party_Binary + Exclusively_Partisan, data = pacs_data)
summary(model2)

## 
## Call:
## lm(formula = Total ~ Party_Binary + Exclusively_Partisan, data = pacs_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -176355  -17689  -15224   14970  365532 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            355874      98098   3.628  0.00149 **
## Party_Binary          -175269      76602  -2.288  0.03211 * 
## Exclusively_Partisan  -162575      70207  -2.316  0.03029 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 137000 on 22 degrees of freedom
## Multiple R-squared:  0.2834, Adjusted R-squared:  0.2182 
## F-statistic:  4.35 on 2 and 22 DF,  p-value: 0.0256

# Check for VIF (Values > 5 indicate potential multicollinearity)
vif(model2)

##         Party_Binary Exclusively_Partisan 
##                 1.05                 1.05

# Testing H1: The Democratic Party recruited more female candidates
# (Using our simulation data since we don't have actual recruitment numbers)
party_recruitment <- data.frame(
  Party = c("Democratic", "Republican"),
  Candidates = c(dem_candidates, rep_candidates),
  PACs = c(sum(pacs_data$Affiliate == "DEM"), sum(pacs_data$Affiliate == "REP"))
)
print(party_recruitment)

##        Party Candidates PACs
## 1 Democratic        150   21
## 2 Republican         45    4

# Testing H2: Democratic women received more financial support
h2_test <- data.frame(
  Party = c("Democratic", "Republican"),
  Total_Support = c(sum(pacs_data$To_Democrats), sum(pacs_data$To_Republicans)),
  Per_Candidate = per_candidate_support$Support_Per_Candidate
)
print(h2_test)

##        Party Total_Support Per_Candidate
## 1 Democratic       1180009      7866.727
## 2 Republican        773696     17193.244

# Testing H3: Consistency of support
consistency_metrics <- pacs_data %>%
  group_by(Affiliate) %>%
  summarise(
    CV = sd(Total) / mean(Total) * 100,  # Coefficient of variation
    IQR_Ratio = IQR(Total) / median(Total),
    PAC_Count = n()
  )
print(consistency_metrics)

## # A tibble: 2 × 4
##   Affiliate    CV IQR_Ratio PAC_Count
##   <chr>     <dbl>     <dbl>     <int>
## 1 DEM        227.      2.98        21
## 2 REP        127.      1.77         4

# VISUALIZATIONS
# 1. Total financial support by party
ggplot(party_summary, aes(x = Affiliate, y = Total_Contributions, fill = Affiliate)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("DEM" = "blue", "REP" = "red")) +
  labs(
    title = "Chart 1: Total Financial Support by Party Affiliation",
    subtitle = "Sum of all contributions from Women's Issues PACs",
    x = "Party Affiliation",
    y = "Total Financial Support ($)"
  ) +
  theme_minimal()

# 2. Number of PACs by party
ggplot(party_summary, aes(x = Affiliate, y = Number_of_PACs, fill = Affiliate)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("DEM" = "blue", "REP" = "red")) +
  labs(
    title = "Chart 2: Number of Women's Issues PACs by Party",
    subtitle = "Count of PACs affiliated with each party",
    x = "Party Affiliation",
    y = "Number of PACs"
  ) +
  theme_minimal()

# 3. Average contribution per PAC by party
ggplot(party_summary, aes(x = Affiliate, y = Mean_Contribution, fill = Affiliate)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("DEM" = "blue", "REP" = "red")) +
  labs(
    title = "Chart 3: Average Contribution per PAC by Party",
    subtitle = "Mean financial contribution of PACs by party affiliation",
    x = "Party Affiliation",
    y = "Average Contribution per PAC ($)"
  ) +
  theme_minimal()

# 4. Support per candidate by party (simulated)
ggplot(per_candidate_support, aes(x = Party, y = Support_Per_Candidate, fill = Party)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("Democratic" = "blue", "Republican" = "red")) +
  labs(
    title = "Chart 4: Average Support per Female Candidate by Party",
    subtitle = "Based on simulated candidate counts",
    x = "Party",
    y = "Support Per Candidate ($)"
  ) +
  theme_minimal()

# 5. Distribution of PAC sizes by party
ggplot(pacs_data, aes(x = Affiliate, fill = PAC_Size)) +
  geom_bar(position = "fill") +
  scale_fill_brewer(palette = "Blues") +
  labs(
    title = "Chart 5: Distribution of PAC Sizes by Party",
    x = "Party Affiliation",
    y = "Proportion"
  ) +
  theme_minimal()

# 6. Distribution of contribution amounts
ggplot(pacs_data, aes(x = Total, fill = Affiliate)) +
  geom_histogram(bins = 10, position = "dodge") +
  scale_fill_manual(values = c("DEM" = "blue", "REP" = "red")) +
  labs(
    title = "Chart 6: Distribution of PAC Contribution Amounts",
    x = "Contribution Amount ($)",
    y = "Count"
  ) +
  theme_minimal()

# Save results for future reference
write.csv(pacs_data, "Processed_PACs_Data.csv", row.names = FALSE)
write.csv(party_summary, "Party_Summary_Statistics.csv", row.names = FALSE)
write.csv(per_candidate_support, "Per_Candidate_Support.csv", row.names = FALSE)

women-candidates-analysis.r

dzc1595

2025-04-22