Women Talking: Bringing the Environment into UK Parliamentary Debates Analysis

Descriptive Statistics

## Total speeches
nrow(Data) ## 670,856
## [1] 670856
## Men vs women speakers 
speakers<- unique(Data$speakername)
length(speakers)
## [1] 1093
count_gen_f <- Data %>%
  filter(gender == "F") %>%
  distinct(speakername) %>%
  nrow()
count_gen_m <- Data %>%
  filter(gender == "M") %>%
  distinct(speakername) %>%
  nrow()

print(paste("Women Speakers", count_gen_f))
## [1] "Women Speakers 333"
print(paste("Men Speakers", count_gen_m))
## [1] "Men Speakers 760"
## Men vs women speeches given

Data <- Data %>%
  mutate(observation_number = row_number())

count_gen_f2 <- Data %>%
  filter(gender == "F") %>%
  distinct(observation_number) %>%
  nrow()
count_gen_m2 <- Data %>%
  filter(gender == "M") %>%
  distinct(observation_number) %>%
  nrow()
print(paste("Women Speakers:", count_gen_f2))
## [1] "Women Speakers: 168265"
print(paste("Men Speakers:", count_gen_m2))
## [1] "Men Speakers: 502591"
## Environmental Speech count
count_ones <- sum(Data$EnvDummy == 1)
count_zeros <- sum(Data$EnvDummy == 0)
print(count_ones)
## [1] 24425
print(count_zeros)
## [1] 646431
## Years of experience 
mean(Data$YearsExp)
## [1] 11.102
## Env Debates count
count_ones2 <- sum(Data$DebateTopic == 1)
count_zeros2 <- sum(Data$DebateTopic == 0)
print(count_ones2)
## [1] 9815
print(count_zeros2)
## [1] 661041
## Env Committee membership
count_comittee_0 <- Data %>%
  filter(EnvCommittee == 0) %>%
  distinct(speakername) %>%
  nrow()
count_committee_1 <- Data %>%
  filter(EnvCommittee == 1) %>%
  distinct(speakername) %>%
  nrow()
print(paste("Number of non-Env Committee members:", count_comittee_0))
## [1] "Number of non-Env Committee members: 924"
print(paste("Number Env Committee members:", count_committee_1))
## [1] "Number Env Committee members: 169"
## Gender of Env Committee Members
count_comitteegen_M <- Data %>%
  filter(EnvCommittee == 1) %>%
  filter(gender == "M") %>%
  distinct(speakername) %>%
  nrow()

count_comitteegen_F <- Data %>%
  filter(EnvCommittee == 1) %>%
  filter(gender == "F") %>%
  distinct(speakername) %>%
  nrow()
print(paste("Number of Men Env Committee members:", count_comitteegen_M))
## [1] "Number of Men Env Committee members: 120"
print(paste("Number of Women Env Committee members:", count_comitteegen_F))
## [1] "Number of Women Env Committee members: 49"
## Ruling Party membership
count_conserv_0 <- Data %>%
  filter(RulingParty == 0) %>%
  distinct(speakername) %>%
  nrow()
count_conserv_1 <- Data %>%
  filter(RulingParty == 1) %>%
  distinct(speakername) %>%
  nrow()
print(paste("Number of Non-Conservative:", count_conserv_0))
## [1] "Number of Non-Conservative: 613"
print(paste("Number of Conservatives:", count_conserv_1))
## [1] "Number of Conservatives: 516"
# Create a line plot with points

##Gender Gap
ggplot(GenderGap, aes(x = Year, y =GenderGap)) +
  geom_line() +
  geom_point() +
  xlim(2010, 2021) +  
  scale_x_continuous(breaks = seq(min(GenderGap$Year), max(GenderGap$Year), by = 1))+
  ylim(-1, 3) +  
  geom_hline(yintercept = 0, color = "black", linetype = "solid", size = 0.9) +
  geom_smooth(method = "lm", se = FALSE, color = "grey", linetype="dashed", size=0.75) +
  labs(title = "Gender Gap in Speech Environmentalism Over Time", x = "Year", y = "Gender Gap (%)") +
  theme_minimal()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

## Men Vs Women Proportion env speeches
ggplot(GenderGap, aes(x = Year)) +
  geom_line(aes(y = WomEnvSpeech, color = "Women's Env Speech")) +
  geom_point(aes(y = WomEnvSpeech, color = "Women's Env Speech")) +
  geom_line(aes(y = MenEnvSpeech, color = "Men's Env Speech")) +
  geom_point(aes(y = MenEnvSpeech, color = "Men's Env Speech")) +
  xlim(2010, 2021) +
  scale_x_continuous(breaks = seq(2010, 2021, by = 1)) +
  ylim(0, 9) +
  geom_hline(yintercept = 0, color = "black", linetype = "solid", size = 0.5) +
  scale_color_manual(values = c("Men's Env Speech" = "blue", "Women's Env Speech" = "Red"),  # Set colors
                     labels = c("Men's Env Speech" = "Men MPs", "Women's Env Speech" = "Women MPs")) +
   labs(title = "Proportion of Women and Men's Environmental Speeches: 2010-2021",
       x = "Year", y = "% Env. Speeches of Total Speeches",
       color = "Legend") +
theme_minimal()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

## By Gender
# Summing EnvDummy by year and gender, excluding the year 2021 and filtering out NA gender
yearly_env_sum <- Data %>%
  filter(year != 2021, !is.na(gender)) %>%  # Exclude 2021 and remove rows where gender is NA
  group_by(year, gender) %>%
  summarise(total_envdummy = sum(EnvDummy)) %>%
  ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Creating total EnvDummy for each year (without gender)
total_env_sum <- Data %>%
  filter(year != 2021) %>%
  group_by(year) %>%
  summarise(total_envdummy = sum(EnvDummy)) %>%
  mutate(gender = "Total")  # Add a 'Total' label for consistency in plotting

# Combine gender-specific data with total data
combined_data <- bind_rows(yearly_env_sum, total_env_sum)

# Ensure 'year' is treated as an integer
combined_data$year <- as.integer(combined_data$year)

# Plot the data with separate lines for each gender and total
ggplot(combined_data, aes(x = year, y = total_envdummy, color = gender)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = combined_data$year) +  # Ensure only whole years appear on x-axis
  scale_color_manual(values = c("F" = "red", "M" = "blue", "Total" = "black"),  # Set colors
                     labels = c("F" = "Female", "M" = "Male", "Total" = "Total")) +  # Set labels
  labs(title = "Total Environmental Speeches per Year: 2010-2020",
       x = "Year", y = "Number of Environmental Speeches") +
  theme_minimal()

Regressions

Testing for NBR or Poisson

## Testing if NBR is correct: 

# Fit a Poisson regression model
AggregatedData$gender<- as.factor(AggregatedData$gender)
AggregatedData$gender <- relevel(AggregatedData$gender, ref = "M")
poisson_model <- glm(EnvDummy~gender+YearsExp+gvt_role+opp_role+parly_role+ EnvCommittee+RulingParty+DebateTopic, data = AggregatedData, family = "poisson")

# Calculate residual deviance
residual_deviance <- sum(resid(poisson_model, type = "pearson")^2)

# Calculate dispersion parameter
df <- df.residual(poisson_model)
dispersion <- residual_deviance / df

# Check if dispersion parameter is significantly greater than 1
if (dispersion > 1) {
  print("Data is overdispersed.")
} else {
  print("Data is not overdispersed.")
}
## [1] "Data is overdispersed."

Regressions:

Regressions: no controls. Printed with default and clustered standard errors.

## Adding Clustered Standard Errors
cluster_vcov1<- vcovCR(LM1, cluster = AggregatedData$speakername, type = "CR0")

## Print 
modelsummary(
  list(
    "Default Standard Errors" = LM1,
    "Clustered Standard Errors" = LM1
  ),
  vcov = list(
    NULL,         # default vcov for first
    cluster_vcov1 # clustered vcov for second
  ),
  exponentiate = TRUE,
  conf_level = 0.95,
  statistic = "std.error",
  stars = TRUE
)
Default Standard Errors Clustered Standard Errors
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 2.920*** 2.920***
(0.063) (0.165)
genderF 1.210*** 1.210
(0.049) (0.143)
Num.Obs. 7902 7902
AIC 34168.9 34168.9
BIC 34189.8 34189.8
Log.Lik. -17081.431 -17081.431
F 22.145
RMSE 7.56 7.56
Std.Errors Custom
## Chi squared goodness of fit test: >0.05= good fit
p_value <- with(LM1, pchisq(deviance, df.residual, lower.tail = FALSE))
p_value 
## [1] 0.1366918

Regressions: with controls. Printed with default and clustered standard errors

## With Controls
AggregatedData$gender <- relevel(AggregatedData$gender, ref = "M")
LM2<-glm.nb(EnvDummy~gender+YearsExp+gvt_role+opp_role+ EnvCommittee+RulingParty+DebateTopic, data = AggregatedData, control = glm.control(maxit = 100))
summary(LM2)
## 
## Call:
## glm.nb(formula = EnvDummy ~ gender + YearsExp + gvt_role + opp_role + 
##     EnvCommittee + RulingParty + DebateTopic, data = AggregatedData, 
##     control = glm.control(maxit = 100), init.theta = 0.5534260782, 
##     link = log)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.521701   0.046682  11.176  < 2e-16 ***
## genderF       0.150494   0.038972   3.862 0.000113 ***
## YearsExp     -0.002473   0.001933  -1.279 0.200918    
## gvt_role      0.415893   0.045963   9.048  < 2e-16 ***
## opp_role      0.154089   0.048246   3.194 0.001404 ** 
## EnvCommittee  0.654666   0.043386  15.089  < 2e-16 ***
## RulingParty   0.132086   0.044370   2.977 0.002912 ** 
## DebateTopic   0.086891   0.002025  42.905  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.5534) family taken to be 1)
## 
##     Null deviance: 9522.9  on 7901  degrees of freedom
## Residual deviance: 8076.9  on 7894  degrees of freedom
## AIC: 32911
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.5534 
##           Std. Err.:  0.0121 
## 
##  2 x log-likelihood:  -32892.5530
## Adding Clustered Standard Errors

cluster_vcov2 <- vcovCR(LM2, cluster = AggregatedData$speakername, type = "CR0")

## Print 
modelsummary(
  list(
    "Default Standard Errors" = LM2,
    "Clustered Standard Errors" = LM2
  ),
  vcov = list(
    NULL,         # default vcov for first
    cluster_vcov2 # clustered vcov for second
  ),
  exponentiate = TRUE,
  conf_level = 0.95,
  statistic = "std.error",
  stars = TRUE
)
Default Standard Errors Clustered Standard Errors
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 1.685*** 1.685***
(0.079) (0.163)
genderF 1.162*** 1.162
(0.045) (0.114)
YearsExp 0.998 0.998
(0.002) (0.004)
gvt_role 1.516*** 1.516***
(0.070) (0.163)
opp_role 1.167** 1.167+
(0.056) (0.109)
EnvCommittee 1.924*** 1.924***
(0.083) (0.185)
RulingParty 1.141** 1.141
(0.051) (0.121)
DebateTopic 1.091*** 1.091***
(0.002) (0.003)
Num.Obs. 7902 7902
AIC 32910.6 32910.6
BIC 32973.3 32973.3
Log.Lik. -16446.276 -16446.276
F 333.554
RMSE 5277328.31 5277328.31
Std.Errors Custom
## Chi squared goodness of fit test: >0.05= good fit
p_value <- with(LM2, pchisq(deviance, df.residual, lower.tail = FALSE))
p_value #0.071 = bigger than 0.05 = good.
## [1] 0.07353772
## No values over 10, so no multicollinearity
vif(LM2)
##       gender     YearsExp     gvt_role     opp_role EnvCommittee  RulingParty 
##     1.095836     1.095033     1.202180     1.583321     1.006176     1.745700 
##  DebateTopic 
##     1.008056

Looking at difference between during and not during environmental debates

Not during environmental Debates: Exponentiation Results with Clustered Standard Errors and goodness of fit tests

## Adding Clustered Standard Errors

cluster_vcov3 <- vcovCR(DebateModel1, cluster = EnvDebateModelData$speakername, type = "CR0")

modelsummary(
  list(
    "Default Standard Errors" = DebateModel1,
    "Clustered Standard Errors" = DebateModel1
  ),
  vcov = list(
    NULL,         # default vcov for first
    cluster_vcov3 # clustered vcov for second
  ),
  exponentiate = TRUE,
  conf_level = 0.95,
  statistic = "std.error",
  stars = TRUE
)
Default Standard Errors Clustered Standard Errors
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 1.568*** 1.568***
(0.083) (0.171)
genderF 1.243*** 1.243+
(0.055) (0.140)
YearsExp 0.999 0.999
(0.002) (0.005)
gvt_role 1.636*** 1.636***
(0.085) (0.191)
opp_role 1.144* 1.144
(0.062) (0.128)
EnvCommittee 2.051*** 2.051***
(0.101) (0.234)
RulingParty 1.162** 1.162
(0.058) (0.143)
Num.Obs. 7248 7248
AIC 28350.0 28350.0
BIC 28405.1 28405.1
Log.Lik. -14166.999 -14166.999
F 59.983
RMSE 5.33 5.33
Std.Errors Custom
## Chi squared goodness of fit test: >0.05= good fit
p_value <- with(DebateModel1, pchisq(deviance, df.residual, lower.tail = FALSE))
p_value
## [1] 0.780214

During environmental Debates: Exponentiation Results with Clustered Standard Errors and goodness of fit tests

DebateModel2 <- glm.nb(DebateTopic ~ gender  + YearsExp + gvt_role + opp_role + EnvCommittee + RulingParty , data = EnvDebateModelData)
summary(DebateModel2)
## 
## Call:
## glm.nb(formula = DebateTopic ~ gender + YearsExp + gvt_role + 
##     opp_role + EnvCommittee + RulingParty, data = EnvDebateModelData, 
##     init.theta = 0.1226613002, link = log)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.555138   0.102757  -5.402 6.57e-08 ***
## genderF       0.048738   0.086661   0.562  0.57385    
## YearsExp     -0.013769   0.004398  -3.131  0.00174 ** 
## gvt_role      0.565258   0.103327   5.471 4.49e-08 ***
## opp_role     -0.036392   0.105680  -0.344  0.73057    
## EnvCommittee  0.876661   0.095730   9.158  < 2e-16 ***
## RulingParty  -0.469679   0.098755  -4.756 1.97e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.1227) family taken to be 1)
## 
##     Null deviance: 3430.1  on 7247  degrees of freedom
## Residual deviance: 3279.7  on 7241  degrees of freedom
## AIC: 11775
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.12266 
##           Std. Err.:  0.00486 
## 
##  2 x log-likelihood:  -11758.55000
## Adding Clustered Standard Errors
cluster_vcov4 <- vcovCR(DebateModel2, cluster = EnvDebateModelData$speakername, type = "CR2")

modelsummary(
  list(
    "Default Standard Errors" = DebateModel2,
    "Clustered Standard Errors" = DebateModel2
  ),
  vcov = list(
    NULL,         # default vcov for first
    cluster_vcov4 # clustered vcov for second
  ),
  exponentiate = TRUE,
  conf_level = 0.95,
  statistic = "std.error",
  stars = TRUE
)
Default Standard Errors Clustered Standard Errors
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 0.574*** 0.574*
(0.059) (0.143)
genderF 1.050 1.050
(0.091) (0.194)
YearsExp 0.986** 0.986
(0.004) (0.008)
gvt_role 1.760*** 1.760+
(0.182) (0.560)
opp_role 0.964 0.964
(0.102) (0.195)
EnvCommittee 2.403*** 2.403***
(0.230) (0.365)
RulingParty 0.625*** 0.625
(0.062) (0.189)
Num.Obs. 7248 7248
AIC 11774.5 11774.5
BIC 11829.7 11829.7
Log.Lik. -5879.275 -5879.275
F 22.691
RMSE 3.12 3.12
Std.Errors Custom
## Chi squared goodness of fit test: >0.05= good fit
p_value <- with(DebateModel2, pchisq(deviance, df.residual, lower.tail = FALSE))
p_value
## [1] 1

Plotting: total environmental speeches per party (figure 4)

Party<-read.csv("/Users/hannahsalamon/Desktop/Ongoing Research/Speeches/2010 Analysis/PartyProportions.csv")
Party$party <- factor(Party$party, levels = c("Conservative", "Labour", "Liberal Democrat","Independent ", "Scottish National Party","Labour (Co-op)", "Democratic Unionist Party"))
ggplot(Party, aes(x = party, y = TotalEnvSpeeches)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), width = 0.7) +
  labs(title = "Total Environmental Speeches per Party", 
       x = "Party", 
       y = "Environmental Speeches")+
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5))

Party Specific Regressions

Party-subsetted Regressions with clustered standard errors

modelsummary(
  list(
    "Conservative" = Cons1,
    "Labour" = Labour1,
    "Lib Dem" = LibDem1,
    "SNP" = SNP1,
    "Lab (Co-op)" = LabCoOp1,
    "Independent" = Independent1
  ),
  vcov = list(
    cluster_vcov_cons,
    cluster_vcov_lab,
    cluster_vcov_libdem,
    cluster_vcov_SNP,
    cluster_vcov_Labcoop,
    cluster_vcov_ind
  ),
  exponentiate = TRUE,
  conf_level = 0.95,
  statistic = "std.error",
  stars = TRUE
)
Conservative Labour Lib Dem SNP Lab (Co-op) Independent
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 1.810*** 1.294* 3.040*** 2.300*** 2.649** 0.965
(0.143) (0.165) (0.631) (0.407) (0.988) (0.433)
genderF 1.392* 1.078 1.210 0.771 0.767 0.598
(0.225) (0.116) (0.356) (0.163) (0.189) (0.266)
YearsExp 1.008 0.994 0.989 0.991 0.993 0.987
(0.005) (0.007) (0.011) (0.015) (0.017) (0.019)
gvt_role 1.431** 1.354 1.311
(0.167) (0.347) (0.716)
EnvCommittee 1.656*** 2.107*** 1.671* 1.396 1.289 1.651
(0.225) (0.267) (0.405) (0.352) (0.421) (1.156)
DebateTopic 1.059*** 1.206*** 1.026*** 1.207*** 1.164*** 1.463**
(0.003) (0.008) (0.002) (0.020) (0.012) (0.170)
opp_role 1.322** 0.857 1.189 1.037 1.356
(0.139) (0.211) (0.245) (0.324) (0.587)
Num.Obs. 3867 2624 407 361 363 62
AIC 16435.4 9908.8 1968.0 1585.7 1651.3 179.3
BIC 16479.2 9949.9 2000.1 1613.0 1678.6 196.3
Log.Lik. -8210.710 -4947.414 -976.018 -785.870 -818.655 -81.651
RMSE 3606.71 1026996.98 47.50 10.63 200800.40 1.78
Std.Errors Custom Custom Custom Custom Custom Custom

Party x gender interaction model

# Plot using ggplot
plot(preds) + 
  aes(shape = group, color = NULL) +  # Use shapes instead of color
  scale_shape_manual(values = c(16, 17)) +  # Optional: custom shapes for gender
  scale_color_manual(values = c("black", "black"), guide = "none") +
  labs(
    title = "Predicted Counts of Environmental Speeches",
    x = "Party", 
    y = "Predicted Probability of Environmental Speeches",
    shape = "Gender"  # This controls the legend title
  ) +
  theme_minimal()
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Appendix

Parties in the dataset with n speakers

table <- AggregatedData %>% ## checking what parties are in the dataset 
  group_by(party) %>%
  summarise(n_speakers = n_distinct(speakername)) %>%
  arrange(desc(n_speakers))
print(table)
## # A tibble: 17 × 2
##    party                             n_speakers
##    <fct>                                  <int>
##  1 Conservative                             516
##  2 Labour                                   361
##  3 Liberal Democrat                          71
##  4 Scottish National Party                   66
##  5 Labour (Co-op)                            53
##  6 Independent                               38
##  7 Democratic Unionist Party                 12
##  8 Plaid Cymru                                5
##  9 Social Democratic & Labour Party           5
## 10 Alliance                                   2
## 11 Speaker                                    2
## 12 UK Independence Party                      2
## 13 Ulster Unionist Party                      2
## 14 Change UK - The Independent Group          1
## 15 Green Party                                1
## 16 Respect                                    1
## 17 The Independent Group for Change           1

1. Party fixed effects model

exp_df
##                                                                                                Term
## (Intercept)                                                                             (Intercept)
## genderF                                                                                     genderF
## YearsExp                                                                                   YearsExp
## gvt_role                                                                                   gvt_role
## opp_role                                                                                   opp_role
## EnvCommittee                                                                           EnvCommittee
## DebateTopic                                                                             DebateTopic
## as.factor(party)Change UK - The Independent Group as.factor(party)Change UK - The Independent Group
## as.factor(party)Conservative                                           as.factor(party)Conservative
## as.factor(party)Democratic Unionist Party                 as.factor(party)Democratic Unionist Party
## as.factor(party)Green Party                                             as.factor(party)Green Party
## as.factor(party)Independent                                             as.factor(party)Independent
## as.factor(party)Labour                                                       as.factor(party)Labour
## as.factor(party)Labour (Co-op)                                       as.factor(party)Labour (Co-op)
## as.factor(party)Liberal Democrat                                   as.factor(party)Liberal Democrat
## as.factor(party)Plaid Cymru                                             as.factor(party)Plaid Cymru
## as.factor(party)Respect                                                     as.factor(party)Respect
## as.factor(party)Scottish National Party                     as.factor(party)Scottish National Party
## as.factor(party)Social Democratic & Labour Party   as.factor(party)Social Democratic & Labour Party
## as.factor(party)Speaker                                                     as.factor(party)Speaker
## as.factor(party)The Independent Group for Change   as.factor(party)The Independent Group for Change
## as.factor(party)UK Independence Party                         as.factor(party)UK Independence Party
## as.factor(party)Ulster Unionist Party                         as.factor(party)Ulster Unionist Party
##                                                       Estimate   Conf.Low
## (Intercept)                                       1.215220e+00 0.43273861
## genderF                                           1.172324e+00 1.08517788
## YearsExp                                          1.000864e+00 0.99703059
## gvt_role                                          1.453756e+00 1.32673061
## opp_role                                          1.277730e+00 1.15900941
## EnvCommittee                                      1.844134e+00 1.69363885
## DebateTopic                                       1.087197e+00 1.08292902
## as.factor(party)Change UK - The Independent Group 5.601592e-17 0.00000000
## as.factor(party)Conservative                      1.568014e+00 0.55775036
## as.factor(party)Democratic Unionist Party         1.045757e+00 0.35580921
## as.factor(party)Green Party                       8.254181e+00 2.28787724
## as.factor(party)Independent                       6.070002e-01 0.19933410
## as.factor(party)Labour                            1.135508e+00 0.40329974
## as.factor(party)Labour (Co-op)                    1.584290e+00 0.55755612
## as.factor(party)Liberal Democrat                  1.693814e+00 0.59693392
## as.factor(party)Plaid Cymru                       1.794261e+00 0.58028250
## as.factor(party)Respect                           6.569717e-17 0.00000000
## as.factor(party)Scottish National Party           1.712143e+00 0.60291734
## as.factor(party)Social Democratic & Labour Party  9.134319e-01 0.28299085
## as.factor(party)Speaker                           8.449123e-01 0.18345911
## as.factor(party)The Independent Group for Change  2.138425e+00 0.10974851
## as.factor(party)UK Independence Party             8.750505e-01 0.15073783
## as.factor(party)Ulster Unionist Party             1.370578e-01 0.01174652
##                                                   Conf.High      p.value
## (Intercept)                                        3.412591 7.113795e-01
## genderF                                            1.266468 5.480931e-05
## YearsExp                                           1.004713 6.590256e-01
## gvt_role                                           1.592944 1.054767e-15
## opp_role                                           1.408612 8.403061e-07
## EnvCommittee                                       2.008002 4.354387e-45
## DebateTopic                                        1.091481 0.000000e+00
## as.factor(party)Change UK - The Independent Group       Inf 9.999996e-01
## as.factor(party)Conservative                       4.408189 3.937104e-01
## as.factor(party)Democratic Unionist Party          3.073582 9.351725e-01
## as.factor(party)Green Party                       29.779356 1.263325e-03
## as.factor(party)Independent                        1.848401 3.795681e-01
## as.factor(party)Labour                             3.197070 8.098550e-01
## as.factor(party)Labour (Co-op)                     4.501746 3.878240e-01
## as.factor(party)Liberal Democrat                   4.806238 3.220030e-01
## as.factor(party)Plaid Cymru                        5.547941 3.100991e-01
## as.factor(party)Respect                                 Inf 9.999991e-01
## as.factor(party)Scottish National Party            4.862080 3.125853e-01
## as.factor(party)Social Democratic & Labour Party   2.948356 8.796211e-01
## as.factor(party)Speaker                            3.891204 8.287766e-01
## as.factor(party)The Independent Group for Change  41.666747 6.159155e-01
## as.factor(party)UK Independence Party              5.079769 8.817549e-01
## as.factor(party)Ulster Unionist Party              1.599183 1.128703e-01
p_value <- with(PartyFixedEffects, pchisq(deviance, df.residual, lower.tail = FALSE))
p_value #0.058 = bigger than 0.05 = good.
## [1] 0.05810485

Testing whether zero inflated negative binomial is necessary using MASS package

library(MASS)
# Fit NB model first
nb_model <- glm.nb(EnvDummy~gender+YearsExp+gvt_role+opp_role+ EnvCommittee+RulingParty+DebateTopic, data = AggregatedData, control = glm.control(maxit = 100))

# Predict zero probabilities from NB
predicted_zeros <- dnbinom(0, size = nb_model$theta, mu = predict(nb_model, type = "response"))

# Compare observed proportion of zeros to predicted
observed_zeros <- mean(AggregatedData$EnvDummy == 0)
cat("Observed zeros:", observed_zeros, "\nExpected zeros under NB:", mean(predicted_zeros))
## Observed zeros: 0.3707922 
## Expected zeros under NB: 0.3929188