# R CODE FOR WOMEN'S ISSUES PACS ANALYSIS
# Load required libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car) # For VIF calculations
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(broom) # For tidy model outputs
## Warning: package 'broom' was built under R version 4.4.3
library(corrplot) # For correlation plots
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
# Create the dataset directly in R (based on the data shared earlier)
pacs_data <- data.frame(
PAC_Name = c("Value in Electing Women PAC", "EMILY's List", "Tri-State Maxed Out Women",
"Maggie's List", "Winning for Women", "Women for American Values & Ethics",
"Electing Women Bay Area", "Vote Mama", "RightNow Women",
"National Women's Political Caucus", "Los Angeles Women's Giving Collective",
"WomenWinning", "Electing Women PAC", "Moms in Office", "Her Bold Move Action",
"Progressive Women's Alliance of Western Michigan", "Santa Barbara Women's PAC",
"Women United for Florida Families", "Electing Women Greater Seattle PAC",
"Run Sister Run PAC", "Matriarch PAC", "Alaska March On", "Ohio Women's Caucus",
"Persistence PAC", "Womencount PAC"),
Affiliate = c("REP", "DEM", "DEM", "REP", "REP", "DEM", "DEM", "DEM", "REP", "DEM",
"DEM", "DEM", "DEM", "DEM", "DEM", "DEM", "DEM", "DEM", "DEM", "DEM",
"DEM", "DEM", "DEM", "DEM", "DEM"),
Total = c("$558,831", "$480,025", "$397,750", "$106,825", "$71,040", "$59,944",
"$57,302", "$45,540", "$36,500", "$33,000", "$30,300", "$23,500", "$17,942",
"$10,750", "$10,250", "$5,000", "$4,250", "$4,005", "$2,806", "$2,500",
"$2,000", "$1,804", "$1,500", "$1,000", "$341"),
To_Democrats = c("$0", "$475,025", "$392,750", "$0", "$0", "$59,944",
"$57,302", "$45,540", "$0", "$33,000", "$30,300", "$23,500", "$17,942",
"$10,250", "$9,750", "$5,000", "$3,750", "$4,005", "$2,806", "$2,500",
"$2,000", "$1,804", "$1,500", "$1,000", "$341"),
To_Republicans = c("$558,831", "$0", "$0", "$106,825", "$71,040", "$0",
"$0", "$0", "$36,500", "$0", "$0", "$0", "$0",
"$0", "$0", "$0", "$500", "$0", "$0", "$0",
"$0", "$0", "$0", "$0", "$0"),
Lean = c("Solidly Republican/Conservative", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal",
"Solidly Republican/Conservative", "Solidly Republican/Conservative", "Solidly Democrat/Liberal",
"Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Republican/Conservative",
"Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal",
"Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal",
"Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal",
"Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal",
"Solidly Democrat/Liberal", "Solidly Democrat/Liberal", "Solidly Democrat/Liberal",
"Solidly Democrat/Liberal")
)
# Clean and transform monetary values
pacs_data <- pacs_data %>%
# Remove $ and commas from monetary values and convert to numeric
mutate(
Total = as.numeric(gsub("[$,]", "", Total)),
To_Democrats = as.numeric(gsub("[$,]", "", To_Democrats)),
To_Republicans = as.numeric(gsub("[$,]", "", To_Republicans))
)
# Create additional variables for analysis
pacs_data <- pacs_data %>%
mutate(
# Define party affiliation as binary (could also use factor)
Party_Binary = ifelse(Affiliate == "DEM", 1, 0),
# Create percentage measures for partisan allocation
Pct_To_Democrats = To_Democrats / Total * 100,
Pct_To_Republicans = To_Republicans / Total * 100,
# Create variable for PAC size (could use various categorizations)
PAC_Size = case_when(
Total < 10000 ~ "Small",
Total < 100000 ~ "Medium",
TRUE ~ "Large"
),
# Create variables to test various hypotheses
Exclusively_Partisan = ifelse(Pct_To_Democrats == 100 | Pct_To_Republicans == 100, 1, 0),
Crossover_Support = ifelse(To_Democrats > 0 & To_Republicans > 0, 1, 0)
)
# DESCRIPTIVE STATISTICS BY PARTY
party_summary <- pacs_data %>%
group_by(Affiliate) %>%
summarise(
Number_of_PACs = n(),
Total_Contributions = sum(Total, na.rm = TRUE),
Mean_Contribution = mean(Total, na.rm = TRUE),
Median_Contribution = median(Total, na.rm = TRUE),
Max_Contribution = max(Total, na.rm = TRUE),
Min_Contribution = min(Total, na.rm = TRUE),
SD_Contribution = sd(Total, na.rm = TRUE)
)
print(party_summary)
## # A tibble: 2 × 8
## Affiliate Number_of_PACs Total_Contributions Mean_Contribution
## <chr> <int> <dbl> <dbl>
## 1 DEM 21 1191509 56739.
## 2 REP 4 773196 193299
## # ℹ 4 more variables: Median_Contribution <dbl>, Max_Contribution <dbl>,
## # Min_Contribution <dbl>, SD_Contribution <dbl>
# Calculate per-candidate support metrics
# Note: This is simulated as we don't have actual candidate counts
dem_candidates <- 150 # Simulated number of Democratic women candidates
rep_candidates <- 45 # Simulated number of Republican women candidates
per_candidate_support <- data.frame(
Party = c("Democratic", "Republican"),
Total_Support = c(sum(pacs_data$To_Democrats), sum(pacs_data$To_Republicans)),
Candidate_Count = c(dem_candidates, rep_candidates)
)
per_candidate_support <- per_candidate_support %>%
mutate(Support_Per_Candidate = Total_Support / Candidate_Count)
print(per_candidate_support)
## Party Total_Support Candidate_Count Support_Per_Candidate
## 1 Democratic 1180009 150 7866.727
## 2 Republican 773696 45 17193.244
# MULTICOLLINEARITY ASSESSMENT
# Select variables for correlation analysis
model_data <- pacs_data %>%
select(Total, To_Democrats, To_Republicans, Party_Binary, Exclusively_Partisan)
# Create correlation matrix
cor_matrix <- cor(model_data, use = "complete.obs")
print(cor_matrix)
## Total To_Democrats To_Republicans Party_Binary
## Total 1.0000000 0.6895891 0.6410149 -0.3296974
## To_Democrats 0.6895891 1.0000000 -0.1138047 0.1776076
## To_Republicans 0.6410149 -0.1138047 1.0000000 -0.6403047
## Party_Binary -0.3296974 0.1776076 -0.6403047 1.0000000
## Exclusively_Partisan -0.3359175 -0.5651765 0.1393817 -0.2182179
## Exclusively_Partisan
## Total -0.3359175
## To_Democrats -0.5651765
## To_Republicans 0.1393817
## Party_Binary -0.2182179
## Exclusively_Partisan 1.0000000
# Visualize correlation matrix
corrplot(cor_matrix, method = "circle", type = "upper",
tl.col = "black", tl.srt = 45)

# REGRESSION MODELS
# Define dependent variable: For example, Total contribution amount
model1 <- lm(Total ~ Party_Binary, data = pacs_data)
summary(model1)
##
## Call:
## lm(formula = Total ~ Party_Binary, data = pacs_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -156799 -54935 -51739 -23739 423286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 193299 74730 2.587 0.0165 *
## Party_Binary -136561 81538 -1.675 0.1075
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 149500 on 23 degrees of freedom
## Multiple R-squared: 0.1087, Adjusted R-squared: 0.06995
## F-statistic: 2.805 on 1 and 23 DF, p-value: 0.1075
# More complex model with additional variables
model2 <- lm(Total ~ Party_Binary + Exclusively_Partisan, data = pacs_data)
summary(model2)
##
## Call:
## lm(formula = Total ~ Party_Binary + Exclusively_Partisan, data = pacs_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -176355 -17689 -15224 14970 365532
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 355874 98098 3.628 0.00149 **
## Party_Binary -175269 76602 -2.288 0.03211 *
## Exclusively_Partisan -162575 70207 -2.316 0.03029 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 137000 on 22 degrees of freedom
## Multiple R-squared: 0.2834, Adjusted R-squared: 0.2182
## F-statistic: 4.35 on 2 and 22 DF, p-value: 0.0256
# Check for VIF (Values > 5 indicate potential multicollinearity)
vif(model2)
## Party_Binary Exclusively_Partisan
## 1.05 1.05
# Testing H1: The Democratic Party recruited more female candidates
# (Using our simulation data since we don't have actual recruitment numbers)
party_recruitment <- data.frame(
Party = c("Democratic", "Republican"),
Candidates = c(dem_candidates, rep_candidates),
PACs = c(sum(pacs_data$Affiliate == "DEM"), sum(pacs_data$Affiliate == "REP"))
)
print(party_recruitment)
## Party Candidates PACs
## 1 Democratic 150 21
## 2 Republican 45 4
# Testing H2: Democratic women received more financial support
h2_test <- data.frame(
Party = c("Democratic", "Republican"),
Total_Support = c(sum(pacs_data$To_Democrats), sum(pacs_data$To_Republicans)),
Per_Candidate = per_candidate_support$Support_Per_Candidate
)
print(h2_test)
## Party Total_Support Per_Candidate
## 1 Democratic 1180009 7866.727
## 2 Republican 773696 17193.244
# Testing H3: Consistency of support
consistency_metrics <- pacs_data %>%
group_by(Affiliate) %>%
summarise(
CV = sd(Total) / mean(Total) * 100, # Coefficient of variation
IQR_Ratio = IQR(Total) / median(Total),
PAC_Count = n()
)
print(consistency_metrics)
## # A tibble: 2 × 4
## Affiliate CV IQR_Ratio PAC_Count
## <chr> <dbl> <dbl> <int>
## 1 DEM 227. 2.98 21
## 2 REP 127. 1.77 4
# VISUALIZATIONS
# 1. Total financial support by party
ggplot(party_summary, aes(x = Affiliate, y = Total_Contributions, fill = Affiliate)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("DEM" = "blue", "REP" = "red")) +
labs(
title = "Chart 1: Total Financial Support by Party Affiliation",
subtitle = "Sum of all contributions from Women's Issues PACs",
x = "Party Affiliation",
y = "Total Financial Support ($)"
) +
theme_minimal()

# 2. Number of PACs by party
ggplot(party_summary, aes(x = Affiliate, y = Number_of_PACs, fill = Affiliate)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("DEM" = "blue", "REP" = "red")) +
labs(
title = "Chart 2: Number of Women's Issues PACs by Party",
subtitle = "Count of PACs affiliated with each party",
x = "Party Affiliation",
y = "Number of PACs"
) +
theme_minimal()

# 3. Average contribution per PAC by party
ggplot(party_summary, aes(x = Affiliate, y = Mean_Contribution, fill = Affiliate)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("DEM" = "blue", "REP" = "red")) +
labs(
title = "Chart 3: Average Contribution per PAC by Party",
subtitle = "Mean financial contribution of PACs by party affiliation",
x = "Party Affiliation",
y = "Average Contribution per PAC ($)"
) +
theme_minimal()

# 4. Support per candidate by party (simulated)
ggplot(per_candidate_support, aes(x = Party, y = Support_Per_Candidate, fill = Party)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("Democratic" = "blue", "Republican" = "red")) +
labs(
title = "Chart 4: Average Support per Female Candidate by Party",
subtitle = "Based on simulated candidate counts",
x = "Party",
y = "Support Per Candidate ($)"
) +
theme_minimal()

# 5. Distribution of PAC sizes by party
ggplot(pacs_data, aes(x = Affiliate, fill = PAC_Size)) +
geom_bar(position = "fill") +
scale_fill_brewer(palette = "Blues") +
labs(
title = "Chart 5: Distribution of PAC Sizes by Party",
x = "Party Affiliation",
y = "Proportion"
) +
theme_minimal()

# 6. Distribution of contribution amounts
ggplot(pacs_data, aes(x = Total, fill = Affiliate)) +
geom_histogram(bins = 10, position = "dodge") +
scale_fill_manual(values = c("DEM" = "blue", "REP" = "red")) +
labs(
title = "Chart 6: Distribution of PAC Contribution Amounts",
x = "Contribution Amount ($)",
y = "Count"
) +
theme_minimal()

# Save results for future reference
write.csv(pacs_data, "Processed_PACs_Data.csv", row.names = FALSE)
write.csv(party_summary, "Party_Summary_Statistics.csv", row.names = FALSE)
write.csv(per_candidate_support, "Per_Candidate_Support.csv", row.names = FALSE)