modelINCpaper

Models for the paper

The models in the paper primarily have the vote_share_percentage of the INC as DV. There are some later models that use the binary of whether INC won or not as the DV.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readr)
library(dplyr)
library(kableExtra)

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(ggplot2)
library(purrr)
library(tidyr)
library(gridExtra)

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

compdata <- read_csv("TCPD_GE_All_States_2024-7-18.csv")

## Rows: 91669 Columns: 45
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): State_Name, Candidate, Sex, Party, Candidate_Type, Constituency_Na...
## dbl (20): Assembly_No, Constituency_No, Year, month, Poll_No, DelimID, Posit...
## lgl  (6): last_poll, Same_Constituency, Same_Party, Turncoat, Incumbent, Rec...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data2019 <- compdata %>% filter(Year == 2019)

data2024 <- read_csv("scrape_row.csv")

## Rows: 8902 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (20): State_Code, State_Name, idi, Constituency_Name, Constituency_Type,...
## dbl (31): Constituency_No, Position, Vote_Share_Percentage, Margin_Percentag...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data2024 <- data2024 %>% mutate(Party = case_when(Party == "Indian National Congress" ~ 'INC',
                                                  Party == "Bharatiya Janata Party" ~ 'BJP', TRUE ~ Party))

data2024 <- data2024 %>%
  mutate(State_Name = ifelse(State_Name == "Jammu_and_Kashmir", "Jammu_&_Kashmir", State_Name))

data2024 <- data2024 %>%
  mutate(State_Name = ifelse(State_Name == "NCT_OF_Delhi", "Delhi", State_Name))

Now let us edit some controls

# This is to idnetify those constituencies where both the parties fought the 
# elections (BJP and INC)
data2024 <- data2024 %>%
  group_by(Constituency_Name) %>%
  mutate(
    both_inc_bjp_contested = ifelse(any(Party == "INC") & any(Party == "BJP"), 'Yes', 'No')
  ) %>%
  ungroup()

# This is for those states where the INC fought the elections together with an 
# agreed upon seat sharing arrangement.
data2024 <- data2024 %>% mutate(Ensemble = ifelse(State_Name %in% c('Tamil_Nadu', 'Bihar','Jharkhand', 'Uttar_Pradesh', 'Maharashtra', 'Delhi'),'yes','no'))

# This figures the districts in which both BJP and INC were competitive and 
# hence were vying with each other for the win
data2024 <- data2024 %>%
  group_by(Constituency_Name) %>%
  mutate(
    both_inc_bjp_below_enop = ifelse(
      any(Party == "INC" & Position <= ENOP) & any(Party == "BJP" & Position <= ENOP),
      'Yes','No'
    )
  ) %>%
  ungroup()

# this for those constituencies where the turnout increased compared to 2019
data2024 <- data2024 %>%
  mutate(turnouthigh = ifelse(turnout_2024 > turnout2019,'yes','no'))

#If the competition was close or not
data2024 <- data2024 %>%
  group_by(Constituency_Name) %>%
  mutate(Topcandimargin = Margin_Percentage[Position == 1]) %>%
  ungroup()

data2024 <- data2024 %>% 
  mutate(closeconsti = ifelse(Topcandimargin <= 5,'yes','no'))

#finally the filter for constituencies in which INC contested
data2024INC <- data2024 %>% filter(Party == 'INC')

# simple model where vote share of the party in 2024 is regressed on its turnout
model1 <- lm(Vote_Share_Percentage ~ turnout_2024, data = data2024INC)
summary(model1)

## 
## Call:
## lm(formula = Vote_Share_Percentage ~ turnout_2024, data = data2024INC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.543  -9.887   2.483  10.542  36.432 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  67.51401    6.05659  11.147  < 2e-16 ***
## turnout_2024 -0.47738    0.08752  -5.455 9.71e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.85 on 326 degrees of freedom
## Multiple R-squared:  0.08364,    Adjusted R-squared:  0.08083 
## F-statistic: 29.75 on 1 and 326 DF,  p-value: 9.711e-08

# same model with controls
model1wc <-lm(Vote_Share_Percentage ~ turnout_2024 + Constituency_Type + turnouthigh + both_inc_bjp_below_enop + Ensemble + Topcandimargin, data = data2024INC) 

summary(model1wc)

## 
## Call:
## lm(formula = Vote_Share_Percentage ~ turnout_2024 + Constituency_Type + 
##     turnouthigh + both_inc_bjp_below_enop + Ensemble + Topcandimargin, 
##     data = data2024INC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.915  -9.389  -0.044   7.873  38.382 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                43.41301    7.43045   5.843 1.32e-08 ***
## turnout_2024               -0.28445    0.09629  -2.954  0.00338 ** 
## Constituency_TypeSC         1.05074    2.05985   0.510  0.61035    
## Constituency_TypeST         5.09973    2.42680   2.101  0.03642 *  
## turnouthighyes              0.58616    1.59735   0.367  0.71390    
## both_inc_bjp_below_enopYes 13.18322    1.77781   7.415 1.21e-12 ***
## Ensembleyes                 6.55679    1.98726   3.299  0.00108 ** 
## Topcandimargin              0.01481    0.07135   0.208  0.83565    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.98 on 305 degrees of freedom
##   (15 observations deleted due to missingness)
## Multiple R-squared:  0.3231, Adjusted R-squared:  0.3075 
## F-statistic:  20.8 on 7 and 305 DF,  p-value: < 2.2e-16

# let us see if close districts had an impact on the fortunes of the party
# let us first make a column that gives the same vote-margin between the top
# two parties in that constituency

data2024INC <- data2024 %>% filter(Party == 'INC')

model2 <- lm(Vote_Share_Percentage ~ closeconsti, data = data2024INC)

summary(model2)

## 
## Call:
## lm(formula = Vote_Share_Percentage ~ closeconsti, data = data2024INC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.600  -6.965   4.119  10.427  31.805 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     34.3651     0.9681  35.499   <2e-16 ***
## closeconstiyes   1.8933     2.0662   0.916     0.36    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.49 on 326 degrees of freedom
## Multiple R-squared:  0.002569,   Adjusted R-squared:  -0.0004907 
## F-statistic: 0.8396 on 1 and 326 DF,  p-value: 0.3602

## Now let us make the models for INC in 2019
# same defining of controls first

data2019 <- data2019 %>%
  mutate(Constituency_Name = case_when(
    Constituency_Name == "HAMIRPUR" & State_Name == "Uttar_Pradesh" ~ "UP Hamirpur",
    Constituency_Name == "HAMIRPUR" & State_Name == "Himachal_Pradesh" ~ "UK Hamirpur",
    Constituency_Name == "AURANGABAD" & State_Name == "Maharashtra" ~ "MH Aurangabad",
    Constituency_Name == "MAHARAJGANJ" & State_Name == "Bihar" ~ "Maharajganj Bihar",
    TRUE ~ Constituency_Name
  ))

data2019 <- data2019 %>%
  filter(!(Constituency_Name == "SATARA" & month == 4))

data2019 <- data2019 %>%
  group_by(Constituency_Name) %>%
  mutate(Topcandimargin = Margin_Percentage[Position == 1]) %>%
  ungroup()

data2019 <- data2019 %>%
  group_by(Constituency_Name) %>%
  mutate(
    both_inc_bjp_below_enop = ifelse(
      any(Party == "INC" & Position <= ENOP) & any(Party == "BJP" & Position <= ENOP),
      'Yes','No'
    )
  ) %>%
  ungroup()

data2019INC <- data2019 %>% filter(Party == 'INC')

model3 <- lm(Vote_Share_Percentage ~ Turnout_Percentage, data = data2019INC)
summary(model3)

## 
## Call:
## lm(formula = Vote_Share_Percentage ~ Turnout_Percentage, data = data2019INC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.395 -18.951   4.332  13.558  40.054 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        28.72308    5.57145   5.155 3.91e-07 ***
## Turnout_Percentage -0.05113    0.07984  -0.640    0.522    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.73 on 420 degrees of freedom
## Multiple R-squared:  0.0009754,  Adjusted R-squared:  -0.001403 
## F-statistic: 0.4101 on 1 and 420 DF,  p-value: 0.5223

model3wc <-lm(Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + both_inc_bjp_below_enop + Topcandimargin, data = data2019INC) 

summary(model3wc)

## 
## Call:
## lm(formula = Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + 
##     both_inc_bjp_below_enop + Topcandimargin, data = data2019INC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.119 -12.622  -1.838   8.102  44.504 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                16.790714   5.262568   3.191  0.00153 ** 
## Turnout_Percentage          0.004874   0.070523   0.069  0.94494    
## Constituency_TypeSC        -2.217034   1.892100  -1.172  0.24198    
## Constituency_TypeST         1.580817   2.486159   0.636  0.52523    
## both_inc_bjp_below_enopYes 17.488779   1.468430  11.910  < 2e-16 ***
## Topcandimargin              0.075475   0.058212   1.297  0.19551    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.22 on 416 degrees of freedom
## Multiple R-squared:  0.2845, Adjusted R-squared:  0.2759 
## F-statistic: 33.09 on 5 and 416 DF,  p-value: < 2.2e-16

## Let us do one for 2014 too because why not

data2014 <- compdata %>% filter(Year == 2014)

data2014 <- data2014 %>%
  mutate(Constituency_Name = case_when(
    Constituency_Name == "HAMIRPUR" & State_Name == "Uttar_Pradesh" ~ "UP Hamirpur",
    Constituency_Name == "HAMIRPUR" & State_Name == "Himachal_Pradesh" ~ "UK Hamirpur",
    Constituency_Name == "AURANGABAD" & State_Name == "Maharashtra" ~ "MH Aurangabad",
    Constituency_Name == "MAHARAJGANJ" & State_Name == "Bihar" ~ "Maharajganj Bihar",
    TRUE ~ Constituency_Name
  ))

data2014 <- data2014 %>%
  filter(!(Constituency_Name == "BEED" & month == 4))

data2014 <- data2014 %>%
  filter(!(Constituency_Name == "KANDHAMAL" & month == 4))

data2014 <- data2014 %>%
  filter(!(Constituency_Name == "MEDAK" & month == 4))

data2014 <- data2014 %>%
  filter(!(Constituency_Name == "VADODARA" & month == 4))

data2014 <- data2014 %>%
  group_by(Constituency_Name) %>%
  mutate(
    both_inc_bjp_below_enop = ifelse(
      any(Party == "INC" & Position <= ENOP) & any(Party == "BJP" & Position <= ENOP),
      'Yes','No'
    )
  ) %>%
  ungroup()

data2014 <- data2014 %>%
  group_by(Constituency_Name) %>%
  mutate(Topcandimargin = Margin_Percentage[Position == 1]) %>%
  ungroup()

data2014INC <- data2014 %>% filter(Party == 'INC')

model4 <- lm(Vote_Share_Percentage ~ Turnout_Percentage, data = data2014INC)
summary(model4)

## 
## Call:
## lm(formula = Vote_Share_Percentage ~ Turnout_Percentage, data = data2014INC)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24.58 -16.73   2.92  12.72  37.50 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        36.11328    4.96048   7.280 1.46e-12 ***
## Turnout_Percentage -0.18964    0.07171  -2.645  0.00846 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.34 on 459 degrees of freedom
## Multiple R-squared:  0.01501,    Adjusted R-squared:  0.01286 
## F-statistic: 6.994 on 1 and 459 DF,  p-value: 0.008461

model4wc <-lm(Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + both_inc_bjp_below_enop + Topcandimargin, data = data2014INC) 

summary(model4wc)

## 
## Call:
## lm(formula = Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + 
##     both_inc_bjp_below_enop + Topcandimargin, data = data2014INC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.624  -9.343  -2.022   6.745  40.068 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                16.63255    4.57680   3.634 0.000311 ***
## Turnout_Percentage         -0.01737    0.05931  -0.293 0.769714    
## Constituency_TypeSC         0.34863    1.49319   0.233 0.815496    
## Constituency_TypeST         3.69159    1.96716   1.877 0.061211 .  
## both_inc_bjp_below_enopYes 19.45801    1.15804  16.802  < 2e-16 ***
## Topcandimargin             -0.15483    0.05586  -2.772 0.005800 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.74 on 455 degrees of freedom
## Multiple R-squared:  0.4283, Adjusted R-squared:  0.4221 
## F-statistic: 68.19 on 5 and 455 DF,  p-value: < 2.2e-16

library(broom)

models <- list(
  model1 = model1,
  model1wc = model1wc,
  model3 = model3,
  model3wc = model3wc,
  model4 = model4,
  model4wc = model4wc
)

tidy_models <- bind_rows(lapply(names(models), function(name) {
  tidy(models[[name]]) %>% mutate(model = name)
}))

View(tidy_models)
# that does't look right.
# Let me try stargazer
# Generate the stargazer table
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

# Load necessary library
library(stargazer)

# Fit the regression models
model1 <- lm(Vote_Share_Percentage ~ turnout_2024, data = data2024INC)
model1wc <- lm(Vote_Share_Percentage ~ turnout_2024 + Constituency_Type + turnouthigh + both_inc_bjp_below_enop + Ensemble + Topcandimargin, data = data2024INC)
model3 <- lm(Vote_Share_Percentage ~ Turnout_Percentage, data = data2019INC)
model3wc <- lm(Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + both_inc_bjp_below_enop + Topcandimargin, data = data2019INC)
model4 <- lm(Vote_Share_Percentage ~ Turnout_Percentage, data = data2014INC)
model4wc <- lm(Vote_Share_Percentage ~ Turnout_Percentage + Constituency_Type + both_inc_bjp_below_enop + Topcandimargin, data = data2014INC)

# Generate the stargazer table with correct labels
stargazer(model1, model1wc, model3, model3wc, model4, model4wc,
          type = "html",
          title = "Regression Results",
          align = TRUE,
          no.space = TRUE,
          column.labels = c("Model 1", "Model 1 WC", "Model 3", "Model 3 WC", "Model 4", "Model 4 WC"),
          covariate.labels = c("Turnout 2024", "Constituency Type", "Turnout High", "Both INC BJP Below ENOP", "Ensemble", "Top Candidate Margin", "Turnout Percentage", "Intercept"),
          dep.var.labels.include = FALSE,
          model.names = FALSE)

## 
## <table style="text-align:center"><caption><strong>Regression Results</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 1 WC</td><td>Model 3</td><td>Model 3 WC</td><td>Model 4</td><td>Model 4 WC</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Turnout 2024</td><td>-0.477<sup>***</sup></td><td>-0.284<sup>***</sup></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td>(0.088)</td><td>(0.096)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Constituency Type</td><td></td><td>1.051</td><td></td><td>-2.217</td><td></td><td>0.349</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(2.060)</td><td></td><td>(1.892)</td><td></td><td>(1.493)</td></tr>
## <tr><td style="text-align:left">Turnout High</td><td></td><td>5.100<sup>**</sup></td><td></td><td>1.581</td><td></td><td>3.692<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(2.427)</td><td></td><td>(2.486)</td><td></td><td>(1.967)</td></tr>
## <tr><td style="text-align:left">Both INC BJP Below ENOP</td><td></td><td>0.586</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(1.597)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Ensemble</td><td></td><td>13.183<sup>***</sup></td><td></td><td>17.489<sup>***</sup></td><td></td><td>19.458<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(1.778)</td><td></td><td>(1.468)</td><td></td><td>(1.158)</td></tr>
## <tr><td style="text-align:left">Top Candidate Margin</td><td></td><td>6.557<sup>***</sup></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(1.987)</td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Turnout Percentage</td><td></td><td>0.015</td><td></td><td>0.075</td><td></td><td>-0.155<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.071)</td><td></td><td>(0.058)</td><td></td><td>(0.056)</td></tr>
## <tr><td style="text-align:left">Intercept</td><td></td><td></td><td>-0.051</td><td>0.005</td><td>-0.190<sup>***</sup></td><td>-0.017</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.080)</td><td>(0.071)</td><td>(0.072)</td><td>(0.059)</td></tr>
## <tr><td style="text-align:left">Constant</td><td>67.514<sup>***</sup></td><td>43.413<sup>***</sup></td><td>28.723<sup>***</sup></td><td>16.791<sup>***</sup></td><td>36.113<sup>***</sup></td><td>16.633<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(6.057)</td><td>(7.430)</td><td>(5.571)</td><td>(5.263)</td><td>(4.960)</td><td>(4.577)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>328</td><td>313</td><td>422</td><td>422</td><td>461</td><td>461</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.084</td><td>0.323</td><td>0.001</td><td>0.285</td><td>0.015</td><td>0.428</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.081</td><td>0.308</td><td>-0.001</td><td>0.276</td><td>0.013</td><td>0.422</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>14.846 (df = 326)</td><td>12.978 (df = 305)</td><td>16.725 (df = 420)</td><td>14.222 (df = 416)</td><td>15.344 (df = 459)</td><td>11.740 (df = 455)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>29.755<sup>***</sup> (df = 1; 326)</td><td>20.796<sup>***</sup> (df = 7; 305)</td><td>0.410 (df = 1; 420)</td><td>33.089<sup>***</sup> (df = 5; 416)</td><td>6.994<sup>***</sup> (df = 1; 459)</td><td>68.187<sup>***</sup> (df = 5; 455)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

modelINCpaper

Pawas

2024-07-31

Models for the paper

Now let us edit some controls