The models in the paper primarily have the vote_share_percentage of the INC as DV. There are some later models that use the binary of whether INC won or not as the DV.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(dplyr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(ggplot2)
library(purrr)
library(tidyr)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
compdata <- read_csv("TCPD_GE_All_States_2024-7-18.csv")
## Rows: 91669 Columns: 45
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): State_Name, Candidate, Sex, Party, Candidate_Type, Constituency_Na...
## dbl (20): Assembly_No, Constituency_No, Year, month, Poll_No, DelimID, Posit...
## lgl (6): last_poll, Same_Constituency, Same_Party, Turncoat, Incumbent, Rec...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data2019 <- compdata %>% filter(Year == 2019)
data2024 <- read_csv("scrape_row.csv")
## Rows: 8902 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (20): State_Code, State_Name, idi, Constituency_Name, Constituency_Type,...
## dbl (31): Constituency_No, Position, Vote_Share_Percentage, Margin_Percentag...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data2024 <- data2024 %>% mutate(Party = case_when(Party == "Indian National Congress" ~ 'INC',
Party == "Bharatiya Janata Party" ~ 'BJP', TRUE ~ Party))
data2024 <- data2024 %>%
mutate(State_Name = ifelse(State_Name == "Jammu_and_Kashmir", "Jammu_&_Kashmir", State_Name))
data2024 <- data2024 %>%
mutate(State_Name = ifelse(State_Name == "NCT_OF_Delhi", "Delhi", State_Name))
# This is to idnetify those constituencies where both the parties fought the
# elections (BJP and INC)
data2024 <- data2024 %>%
group_by(Constituency_Name) %>%
mutate(
both_inc_bjp_contested = ifelse(any(Party == "INC") & any(Party == "BJP"), 'Yes', 'No')
) %>%
ungroup()
# This is for those states where the INC fought the elections together with an
# agreed upon seat sharing arrangement.
data2024 <- data2024 %>% mutate(Ensemble = ifelse(State_Name %in% c('Tamil_Nadu', 'Bihar','Jharkhand', 'Uttar_Pradesh', 'Maharashtra', 'Delhi'),'yes','no'))
# This figures the districts in which both BJP and INC were competitive and
# hence were vying with each other for the win
data2024 <- data2024 %>%
group_by(Constituency_Name) %>%
mutate(
both_inc_bjp_below_enop = ifelse(
any(Party == "INC" & Position <= ENOP) & any(Party == "BJP" & Position <= ENOP),
'Yes','No'
)
) %>%
ungroup()
# this for those constituencies where the turnout increased compared to 2019
data2024 <- data2024 %>%
mutate(turnouthigh = ifelse(turnout_2024 > turnout2019,'yes','no'))
#If the competition was close or not
data2024 <- data2024 %>%
group_by(Constituency_Name) %>%
mutate(Topcandimargin = Margin_Percentage[Position == 1]) %>%
ungroup()
data2024 <- data2024 %>%
mutate(closeconsti = ifelse(Topcandimargin <= 5,'yes','no'))
#finally the filter for constituencies in which INC contested
data2024INC <- data2024 %>% filter(Party == 'INC')
# simple model where vote share of the party in 2024 is regressed on its turnout
model1 <- lm(Vote_Share_Percentage ~ turnout_2024, data = data2024INC)
summary(model1)
##
## Call:
## lm(formula = Vote_Share_Percentage ~ turnout_2024, data = data2024INC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.543 -9.887 2.483 10.542 36.432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.51401 6.05659 11.147 < 2e-16 ***
## turnout_2024 -0.47738 0.08752 -5.455 9.71e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.85 on 326 degrees of freedom
## Multiple R-squared: 0.08364, Adjusted R-squared: 0.08083
## F-statistic: 29.75 on 1 and 326 DF, p-value: 9.711e-08
# same model with controls
model1wc <-lm(Vote_Share_Percentage ~ turnout_2024 + Constituency_Type + turnouthigh + both_inc_bjp_below_enop + Ensemble + Topcandimargin, data = data2024INC)
summary(model1wc)
##
## Call:
## lm(formula = Vote_Share_Percentage ~ turnout_2024 + Constituency_Type +
## turnouthigh + both_inc_bjp_below_enop + Ensemble + Topcandimargin,
## data = data2024INC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.915 -9.389 -0.044 7.873 38.382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.41301 7.43045 5.843 1.32e-08 ***
## turnout_2024 -0.28445 0.09629 -2.954 0.00338 **
## Constituency_TypeSC 1.05074 2.05985 0.510 0.61035
## Constituency_TypeST 5.09973 2.42680 2.101 0.03642 *
## turnouthighyes 0.58616 1.59735 0.367 0.71390
## both_inc_bjp_below_enopYes 13.18322 1.77781 7.415 1.21e-12 ***
## Ensembleyes 6.55679 1.98726 3.299 0.00108 **
## Topcandimargin 0.01481 0.07135 0.208 0.83565
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.98 on 305 degrees of freedom
## (15 observations deleted due to missingness)
## Multiple R-squared: 0.3231, Adjusted R-squared: 0.3075
## F-statistic: 20.8 on 7 and 305 DF, p-value: < 2.2e-16
# let us see if close districts had an impact on the fortunes of the party
# let us first make a column that gives the same vote-margin between the top
# two parties in that constituency
data2024INC <- data2024 %>% filter(Party == 'INC')
model2 <- lm(Vote_Share_Percentage ~ closeconsti, data = data2024INC)
summary(model2)
##
## Call:
## lm(formula = Vote_Share_Percentage ~ closeconsti, data = data2024INC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.600 -6.965 4.119 10.427 31.805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.3651 0.9681 35.499 <2e-16 ***
## closeconstiyes 1.8933 2.0662 0.916 0.36
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.49 on 326 degrees of freedom
## Multiple R-squared: 0.002569, Adjusted R-squared: -0.0004907
## F-statistic: 0.8396 on 1 and 326 DF, p-value: 0.3602
## Now let us make the models for INC in 2019
# same defining of controls first
data2019 <- data2019 %>%
mutate(Constituency_Name = case_when(
Constituency_Name == "HAMIRPUR" & State_Name == "Uttar_Pradesh" ~ "UP Hamirpur",
Constituency_Name == "HAMIRPUR" & State_Name == "Himachal_Pradesh" ~ "UK Hamirpur",
Constituency_Name == "AURANGABAD" & State_Name == "Maharashtra" ~ "MH Aurangabad",
Constituency_Name == "MAHARAJGANJ" & State_Name == "Bihar" ~ "Maharajganj Bihar",
TRUE ~ Constituency_Name
))
data2019 <- data2019 %>%
filter(!(Constituency_Name == "SATARA" & month == 4))
data2019 <- data2019 %>%
group_by(Constituency_Name) %>%
mutate(Topcandimargin = Margin_Percentage[Position == 1]) %>%
ungroup()
data2019 <- data2019 %>%
group_by(Constituency_Name) %>%
mutate(
both_inc_bjp_below_enop = ifelse(
any(Party == "INC" & Position <= ENOP) & any(Party == "BJP" & Position <= ENOP),
'Yes','No'
)
) %>%
ungroup()
data2019INC <- data2019 %>% filter(Party == 'INC')
model3 <- lm(Vote_Share_Percentage ~ Turnout_Percentage, data = data2019INC)
summary(model3)
##
## Call:
## lm(formula = Vote_Share_Percentage ~ Turnout_Percentage, data = data2019INC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.395 -18.951 4.332 13.558 40.054
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.72308 5.57145 5.155 3.91e-07 ***
## Turnout_Percentage -0.05113 0.07984 -0.640 0.522
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.73 on 420 degrees of freedom
## Multiple R-squared: 0.0009754, Adjusted R-squared: -0.001403
## F-statistic: 0.4101 on 1 and 420 DF, p-value: 0.5223
model3wc <-lm(Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + both_inc_bjp_below_enop + Topcandimargin, data = data2019INC)
summary(model3wc)
##
## Call:
## lm(formula = Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type +
## both_inc_bjp_below_enop + Topcandimargin, data = data2019INC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.119 -12.622 -1.838 8.102 44.504
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.790714 5.262568 3.191 0.00153 **
## Turnout_Percentage 0.004874 0.070523 0.069 0.94494
## Constituency_TypeSC -2.217034 1.892100 -1.172 0.24198
## Constituency_TypeST 1.580817 2.486159 0.636 0.52523
## both_inc_bjp_below_enopYes 17.488779 1.468430 11.910 < 2e-16 ***
## Topcandimargin 0.075475 0.058212 1.297 0.19551
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.22 on 416 degrees of freedom
## Multiple R-squared: 0.2845, Adjusted R-squared: 0.2759
## F-statistic: 33.09 on 5 and 416 DF, p-value: < 2.2e-16
## Let us do one for 2014 too because why not
data2014 <- compdata %>% filter(Year == 2014)
data2014 <- data2014 %>%
mutate(Constituency_Name = case_when(
Constituency_Name == "HAMIRPUR" & State_Name == "Uttar_Pradesh" ~ "UP Hamirpur",
Constituency_Name == "HAMIRPUR" & State_Name == "Himachal_Pradesh" ~ "UK Hamirpur",
Constituency_Name == "AURANGABAD" & State_Name == "Maharashtra" ~ "MH Aurangabad",
Constituency_Name == "MAHARAJGANJ" & State_Name == "Bihar" ~ "Maharajganj Bihar",
TRUE ~ Constituency_Name
))
data2014 <- data2014 %>%
filter(!(Constituency_Name == "BEED" & month == 4))
data2014 <- data2014 %>%
filter(!(Constituency_Name == "KANDHAMAL" & month == 4))
data2014 <- data2014 %>%
filter(!(Constituency_Name == "MEDAK" & month == 4))
data2014 <- data2014 %>%
filter(!(Constituency_Name == "VADODARA" & month == 4))
data2014 <- data2014 %>%
group_by(Constituency_Name) %>%
mutate(
both_inc_bjp_below_enop = ifelse(
any(Party == "INC" & Position <= ENOP) & any(Party == "BJP" & Position <= ENOP),
'Yes','No'
)
) %>%
ungroup()
data2014 <- data2014 %>%
group_by(Constituency_Name) %>%
mutate(Topcandimargin = Margin_Percentage[Position == 1]) %>%
ungroup()
data2014INC <- data2014 %>% filter(Party == 'INC')
model4 <- lm(Vote_Share_Percentage ~ Turnout_Percentage, data = data2014INC)
summary(model4)
##
## Call:
## lm(formula = Vote_Share_Percentage ~ Turnout_Percentage, data = data2014INC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.58 -16.73 2.92 12.72 37.50
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.11328 4.96048 7.280 1.46e-12 ***
## Turnout_Percentage -0.18964 0.07171 -2.645 0.00846 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.34 on 459 degrees of freedom
## Multiple R-squared: 0.01501, Adjusted R-squared: 0.01286
## F-statistic: 6.994 on 1 and 459 DF, p-value: 0.008461
model4wc <-lm(Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + both_inc_bjp_below_enop + Topcandimargin, data = data2014INC)
summary(model4wc)
##
## Call:
## lm(formula = Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type +
## both_inc_bjp_below_enop + Topcandimargin, data = data2014INC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.624 -9.343 -2.022 6.745 40.068
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.63255 4.57680 3.634 0.000311 ***
## Turnout_Percentage -0.01737 0.05931 -0.293 0.769714
## Constituency_TypeSC 0.34863 1.49319 0.233 0.815496
## Constituency_TypeST 3.69159 1.96716 1.877 0.061211 .
## both_inc_bjp_below_enopYes 19.45801 1.15804 16.802 < 2e-16 ***
## Topcandimargin -0.15483 0.05586 -2.772 0.005800 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.74 on 455 degrees of freedom
## Multiple R-squared: 0.4283, Adjusted R-squared: 0.4221
## F-statistic: 68.19 on 5 and 455 DF, p-value: < 2.2e-16
library(broom)
models <- list(
model1 = model1,
model1wc = model1wc,
model3 = model3,
model3wc = model3wc,
model4 = model4,
model4wc = model4wc
)
tidy_models <- bind_rows(lapply(names(models), function(name) {
tidy(models[[name]]) %>% mutate(model = name)
}))
View(tidy_models)
# that does't look right.
# Let me try stargazer
# Generate the stargazer table
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Load necessary library
library(stargazer)
# Fit the regression models
model1 <- lm(Vote_Share_Percentage ~ turnout_2024, data = data2024INC)
model1wc <- lm(Vote_Share_Percentage ~ turnout_2024 + Constituency_Type + turnouthigh + both_inc_bjp_below_enop + Ensemble + Topcandimargin, data = data2024INC)
model3 <- lm(Vote_Share_Percentage ~ Turnout_Percentage, data = data2019INC)
model3wc <- lm(Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + both_inc_bjp_below_enop + Topcandimargin, data = data2019INC)
model4 <- lm(Vote_Share_Percentage ~ Turnout_Percentage, data = data2014INC)
model4wc <- lm(Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + both_inc_bjp_below_enop + Topcandimargin, data = data2014INC)
# Generate the stargazer table with correct labels
stargazer(model1, model1wc, model3, model3wc, model4, model4wc,
type = "html",
title = "Regression Results",
align = TRUE,
no.space = TRUE,
column.labels = c("Model 1", "Model 1 WC", "Model 3", "Model 3 WC", "Model 4", "Model 4 WC"),
covariate.labels = c("Turnout 2024", "Constituency Type", "Turnout High", "Both INC BJP Below ENOP", "Ensemble", "Top Candidate Margin", "Turnout Percentage", "Intercept"),
dep.var.labels.include = FALSE,
model.names = FALSE)
##
## <table style="text-align:center"><caption><strong>Regression Results</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 1 WC</td><td>Model 3</td><td>Model 3 WC</td><td>Model 4</td><td>Model 4 WC</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Turnout 2024</td><td>-0.477<sup>***</sup></td><td>-0.284<sup>***</sup></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td>(0.088)</td><td>(0.096)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Constituency Type</td><td></td><td>1.051</td><td></td><td>-2.217</td><td></td><td>0.349</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(2.060)</td><td></td><td>(1.892)</td><td></td><td>(1.493)</td></tr>
## <tr><td style="text-align:left">Turnout High</td><td></td><td>5.100<sup>**</sup></td><td></td><td>1.581</td><td></td><td>3.692<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(2.427)</td><td></td><td>(2.486)</td><td></td><td>(1.967)</td></tr>
## <tr><td style="text-align:left">Both INC BJP Below ENOP</td><td></td><td>0.586</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(1.597)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Ensemble</td><td></td><td>13.183<sup>***</sup></td><td></td><td>17.489<sup>***</sup></td><td></td><td>19.458<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(1.778)</td><td></td><td>(1.468)</td><td></td><td>(1.158)</td></tr>
## <tr><td style="text-align:left">Top Candidate Margin</td><td></td><td>6.557<sup>***</sup></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(1.987)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Turnout Percentage</td><td></td><td>0.015</td><td></td><td>0.075</td><td></td><td>-0.155<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.071)</td><td></td><td>(0.058)</td><td></td><td>(0.056)</td></tr>
## <tr><td style="text-align:left">Intercept</td><td></td><td></td><td>-0.051</td><td>0.005</td><td>-0.190<sup>***</sup></td><td>-0.017</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.080)</td><td>(0.071)</td><td>(0.072)</td><td>(0.059)</td></tr>
## <tr><td style="text-align:left">Constant</td><td>67.514<sup>***</sup></td><td>43.413<sup>***</sup></td><td>28.723<sup>***</sup></td><td>16.791<sup>***</sup></td><td>36.113<sup>***</sup></td><td>16.633<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(6.057)</td><td>(7.430)</td><td>(5.571)</td><td>(5.263)</td><td>(4.960)</td><td>(4.577)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>328</td><td>313</td><td>422</td><td>422</td><td>461</td><td>461</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.084</td><td>0.323</td><td>0.001</td><td>0.285</td><td>0.015</td><td>0.428</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.081</td><td>0.308</td><td>-0.001</td><td>0.276</td><td>0.013</td><td>0.422</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>14.846 (df = 326)</td><td>12.978 (df = 305)</td><td>16.725 (df = 420)</td><td>14.222 (df = 416)</td><td>15.344 (df = 459)</td><td>11.740 (df = 455)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>29.755<sup>***</sup> (df = 1; 326)</td><td>20.796<sup>***</sup> (df = 7; 305)</td><td>0.410 (df = 1; 420)</td><td>33.089<sup>***</sup> (df = 5; 416)</td><td>6.994<sup>***</sup> (df = 1; 459)</td><td>68.187<sup>***</sup> (df = 5; 455)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>