Starting with the updated tables

Summary Stats from 2024
Campaign Type Total Seats BJP Seats Won INC Seats Won Avg INC Vote Share (%) Avg BJP Vote Share (%) Avg INC Vote Share Change (%) Avg BJP Vote Share Change (%) INC Strike Rate BJP Strike Rate Avg Turnout Change (%)
Modi Only 148 73 21 34.08 45.38 5.08 -2.14 0.14 0.49 -1.06
Rahul Only 39 17 11 40.43 42.12 4.54 -4.52 0.28 0.44 -1.63
Both 34 19 10 38.42 44.22 9.69 -1.89 0.29 0.56 -0.46
Neither 322 131 57 33.52 43.95 3.36 -1.93 0.18 0.41 -2.30
Total/Average 543 240 99 34.76 44.26 4.45 -2.17 0.18 0.44 -1.79
Summary Stats from 2019
Campaign Type Total Seats BJP Seats Won INC Seats Won Avg INC Vote Share (%) Avg BJP Vote Share (%) Avg INC Vote Share Change (%) Avg BJP Vote Share Change (%) INC Strike Rate BJP Strike Rate Avg Turnout Change (%)
Modi Only 108 79 2 21.05 48.73 -1.84 10.53 0.02 0.73 1.33
Rahul Only 71 49 11 32.26 49.14 3.65 8.71 0.15 0.69 1.71
Both 25 20 3 33.31 50.67 1.89 6.92 0.12 0.80 1.59
Neither 339 155 36 24.02 43.12 1.88 8.45 0.11 0.46 0.21
Total/Average 543 303 52 25.23 45.69 1.39 8.89 0.10 0.56 0.70
2019
2024
Person For_Whom GEN (411)(75.5%) SC (84)(15.5%) ST (48)(9%) GEN (411)(75.5%) SC (84)(15.5%) ST (48)(9%)
Modi
Modi Ally 8 1 0 12 4 0
Modi Self 94 17 13 122 24 17
Rahul
Rahul Ally 5 0 0 10 0 1
Rahul Self 65 20 11 42 6 8

Distance graphs

# Boxplots

Let us make some maps

## Reading layer `merged_data_gis' from data source 
##   `/Users/pawaspratikshit/Desktop/Congress Paper/merged_data_gis.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 543 features and 85 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 68.09348 ymin: 6.762522 xmax: 97.4115 ymax: 37.07761
## Geodetic CRS:  WGS 84

# models

# A simple model first

library(spdep)
## Warning: package 'spdep' was built under R version 4.3.3
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.3.3
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 4.3.3
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
library(sf)

if (!inherits(merged_data, "sf")) {
  merged_data <- st_as_sf(merged_data)
}

merged_data <- st_make_valid(merged_data)

coords <- st_centroid(st_geometry(merged_data)) 
coords_matrix <- st_coordinates(coords)    

nb <- knn2nb(knearneigh(coords_matrix, k = 6))

W <- nb2listw(nb, style = "W", zero.policy = TRUE)

merged_data <- merged_data %>% mutate(anyrally24 = ifelse(rahul24rally|modi24rally == 1,1,0))

merged_data$anyrally24[is.na(merged_data$anyrally24)] <- 0

merged_data$anyrally24_lag <- lag.listw(W, merged_data$anyrally24, zero.policy = TRUE)

merged_data$rahul24rally[is.na(merged_data$rahul24rally)] <- 0

merged_data$modi24rally[is.na(merged_data$modi24rally)] <- 0

merged_data$rahulrally24_lag <- lag.listw(W, merged_data$rahul24rally, zero.policy = TRUE)

merged_data$modirally24_lag <- lag.listw(W, merged_data$modi24rally, zero.policy = TRUE)

merged_data <- merged_data %>% mutate(todiff19and24 = turnout_2024-turnout2019)

sar_model <- lagsarlm(todiff19and24 ~ anyrally24 + anyrally24_lag, 
                       data = merged_data, 
                       listw = W, 
                       zero.policy = TRUE)

summary(sar_model)
## 
## Call:lagsarlm(formula = todiff19and24 ~ anyrally24 + anyrally24_lag, 
##     data = merged_data, listw = W, zero.policy = TRUE)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -21.064752  -1.308721   0.008247   1.353958   9.394872 
## 
## Type: lag 
## Regions with no neighbours included:
##  38 464 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error z value  Pr(>|z|)
## (Intercept)    -1.21014    0.28629 -4.2270 2.368e-05
## anyrally24      0.83823    0.22746  3.6852 0.0002286
## anyrally24_lag  0.79440    0.53379  1.4882 0.1366884
## 
## Rho: 0.69705, LR test value: 230.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.038656
##     z-value: 18.032, p-value: < 2.22e-16
## Wald statistic: 325.16, p-value: < 2.22e-16
## 
## Log likelihood: -1258.1 for lag model
## ML residual variance (sigma squared): 6.5421, (sigma: 2.5578)
## Number of observations: 523 
## Number of parameters estimated: 5 
## AIC: 2526.2, (AIC for lm: 2754.7)
## LM test for residual autocorrelation
## test value: 38.196, p-value: 6.3983e-10
nb <- poly2nb(merged_data, queen = TRUE)  
## Warning in poly2nb(merged_data, queen = TRUE): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(merged_data, queen = TRUE): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
merged_data$ST_NAME <- as.character(merged_data$ST_NAME)

merged_data$boundary_constituency <- sapply(1:length(nb), function(i) {
  neighbor_indices <- nb[[i]]
  neighbor_states <- unique(merged_data$ST_NAME[neighbor_indices])
  return(ifelse(any(neighbor_states != merged_data$ST_NAME[i]), 1, 0))
})

merged_data <- merged_data %>%
  mutate(Phase = case_when(
    doe2024 == "2024-04-19" ~ "Phase 1",
    doe2024 == "2024-04-26" ~ "Phase 2",
    doe2024 == "2024-05-07" ~ "Phase 3",
    doe2024 == "2024-05-13" ~ "Phase 4",
    doe2024 == "2024-05-20" ~ "Phase 5",
    doe2024 == "2024-05-25" ~ "Phase 6",
    doe2024 == "2024-06-01" ~ "Phase 7",
    TRUE ~ NA_character_  
  ))

ggplot(merged_data) +
     geom_sf(aes(fill = as.factor(boundary_constituency))) +
     scale_fill_manual(values = c("0" = "gray", "1" = "red")) +
     theme_minimal() +
     ggtitle("Boundary Constituencies in India")

sar_model2 <- lagsarlm(todiff19and24 ~ anyrally24 + anyrally24_lag + boundary_constituency + as.factor(Phase), 
                      data = merged_data, listw = W, zero.policy = TRUE)
summary(sar_model2)
## 
## Call:lagsarlm(formula = todiff19and24 ~ anyrally24 + anyrally24_lag + 
##     boundary_constituency + as.factor(Phase), data = merged_data, 
##     listw = W, zero.policy = TRUE)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -20.486495  -1.331912  -0.098319   1.287441  10.197879 
## 
## Type: lag 
## Regions with no neighbours included:
##  38 464 
## Coefficients: (asymptotic standard errors) 
##                         Estimate Std. Error z value  Pr(>|z|)
## (Intercept)             -1.77296    0.38612 -4.5917 4.395e-06
## anyrally24               0.82141    0.22504  3.6501 0.0002621
## anyrally24_lag           0.60157    0.54878  1.0962 0.2729936
## boundary_constituency   -0.31544    0.22508 -1.4015 0.1610695
## as.factor(Phase)Phase 2 -0.35338    0.38448 -0.9191 0.3580435
## as.factor(Phase)Phase 3  1.08594    0.38606  2.8129 0.0049103
## as.factor(Phase)Phase 4  1.36755    0.39188  3.4897 0.0004835
## as.factor(Phase)Phase 5  1.31557    0.47999  2.7408 0.0061283
## as.factor(Phase)Phase 6  0.96700    0.44302  2.1828 0.0290527
## as.factor(Phase)Phase 7  0.77081    0.45069  1.7103 0.0872107
## 
## Rho: 0.62531, LR test value: 165.35, p-value: < 2.22e-16
## Asymptotic standard error: 0.042774
##     z-value: 14.619, p-value: < 2.22e-16
## Wald statistic: 213.72, p-value: < 2.22e-16
## 
## Log likelihood: -1241.348 for lag model
## ML residual variance (sigma squared): 6.279, (sigma: 2.5058)
## Number of observations: 523 
## Number of parameters estimated: 12 
## AIC: 2506.7, (AIC for lm: 2670)
## LM test for residual autocorrelation
## test value: 14.852, p-value: 0.00011627
sar_model3 <- lagsarlm(Incvsdiff19and24 ~ rahul24rally + rahulrally24_lag + boundary_constituency + as.factor(Phase), 
                      data = merged_data, listw = W, zero.policy = TRUE)
## Warning in subset.nb(x = nb, subset = subset): subsetting caused increase in
## subgraph count
summary(sar_model3)
## 
## Call:lagsarlm(formula = Incvsdiff19and24 ~ rahul24rally + rahulrally24_lag + 
##     boundary_constituency + as.factor(Phase), data = merged_data, 
##     listw = W, zero.policy = TRUE)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -35.5105  -5.2555  -1.1336   5.0321  36.5298 
## 
## Type: lag 
## Regions with no neighbours included:
##  8 61 148 232 267 274 295 405 414 415 449 457 500 530 535 
## Coefficients: (asymptotic standard errors) 
##                           Estimate Std. Error z value Pr(>|z|)
## (Intercept)             -0.0088056  1.7230654 -0.0051  0.99592
## rahul24rally             3.2796188  1.5854824  2.0685  0.03859
## rahulrally24_lag        -0.3339589  3.8305493 -0.0872  0.93053
## boundary_constituency    2.4197308  1.2425813  1.9473  0.05149
## as.factor(Phase)Phase 2  0.1142460  1.9224866  0.0594  0.95261
## as.factor(Phase)Phase 3 -0.0381085  1.9043475 -0.0200  0.98403
## as.factor(Phase)Phase 4  1.4366603  1.9441376  0.7390  0.45992
## as.factor(Phase)Phase 5  5.3177792  2.9525665  1.8011  0.07169
## as.factor(Phase)Phase 6  6.4186365  2.6237264  2.4464  0.01443
## as.factor(Phase)Phase 7 -2.1745285  2.5702228 -0.8460  0.39753
## 
## Rho: 0.41673, LR test value: 35.923, p-value: 2.0531e-09
## Asymptotic standard error: 0.0639
##     z-value: 6.5217, p-value: 6.9514e-11
## Wald statistic: 42.533, p-value: 6.9514e-11
## 
## Log likelihood: -1148.082 for lag model
## ML residual variance (sigma squared): 102.34, (sigma: 10.116)
## Number of observations: 306 
## Number of parameters estimated: 12 
## AIC: 2320.2, (AIC for lm: 2354.1)
## LM test for residual autocorrelation
## test value: 0.94559, p-value: 0.33084
sar_model4 <- lagsarlm(Bjpvsdiff19and24 ~ modi24rally + modirally24_lag + boundary_constituency + as.factor(Phase), 
                      data = merged_data, listw = W, zero.policy = TRUE)
## Warning in subset.nb(x = nb, subset = subset): subsetting caused increase in
## subgraph count
summary(sar_model4)
## 
## Call:lagsarlm(formula = Bjpvsdiff19and24 ~ modi24rally + modirally24_lag + 
##     boundary_constituency + as.factor(Phase), data = merged_data, 
##     listw = W, zero.policy = TRUE)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -35.62910  -3.80240  -0.20617   2.95627  41.89311 
## 
## Type: lag 
## Regions with no neighbours included:
##  415 478 507 
## Coefficients: (asymptotic standard errors) 
##                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)             -1.61075    1.34221 -1.2001  0.23011
## modi24rally             -0.51792    0.73993 -0.7000  0.48396
## modirally24_lag          2.65332    1.88749  1.4057  0.15980
## boundary_constituency   -1.39932    0.74604 -1.8757  0.06070
## as.factor(Phase)Phase 2  1.28442    1.33603  0.9614  0.33637
## as.factor(Phase)Phase 3  1.01297    1.28945  0.7856  0.43211
## as.factor(Phase)Phase 4  3.23499    1.35692  2.3841  0.01712
## as.factor(Phase)Phase 5 -0.96874    1.54762 -0.6260  0.53135
## as.factor(Phase)Phase 6 -0.65616    1.43259 -0.4580  0.64694
## as.factor(Phase)Phase 7 -0.26279    1.50248 -0.1749  0.86116
## 
## Rho: 0.683, LR test value: 189.24, p-value: < 2.22e-16
## Asymptotic standard error: 0.039624
##     z-value: 17.237, p-value: < 2.22e-16
## Wald statistic: 297.11, p-value: < 2.22e-16
## 
## Log likelihood: -1361.977 for lag model
## ML residual variance (sigma squared): 50.831, (sigma: 7.1296)
## Number of observations: 396 
## Number of parameters estimated: 12 
## AIC: 2748, (AIC for lm: 2935.2)
## LM test for residual autocorrelation
## test value: 6.0816, p-value: 0.01366