Let us make some maps
## Reading layer `merged_data_gis' from data source
## `/Users/pawaspratikshit/Desktop/Congress Paper/merged_data_gis.gpkg'
## using driver `GPKG'
## Simple feature collection with 543 features and 85 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 68.09348 ymin: 6.762522 xmax: 97.4115 ymax: 37.07761
## Geodetic CRS: WGS 84






# models
# A simple model first
library(spdep)
## Warning: package 'spdep' was built under R version 4.3.3
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.3.3
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 4.3.3
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(sf)
if (!inherits(merged_data, "sf")) {
merged_data <- st_as_sf(merged_data)
}
merged_data <- st_make_valid(merged_data)
coords <- st_centroid(st_geometry(merged_data))
coords_matrix <- st_coordinates(coords)
nb <- knn2nb(knearneigh(coords_matrix, k = 6))
W <- nb2listw(nb, style = "W", zero.policy = TRUE)
merged_data <- merged_data %>% mutate(anyrally24 = ifelse(rahul24rally|modi24rally == 1,1,0))
merged_data$anyrally24[is.na(merged_data$anyrally24)] <- 0
merged_data$anyrally24_lag <- lag.listw(W, merged_data$anyrally24, zero.policy = TRUE)
merged_data$rahul24rally[is.na(merged_data$rahul24rally)] <- 0
merged_data$modi24rally[is.na(merged_data$modi24rally)] <- 0
merged_data$rahulrally24_lag <- lag.listw(W, merged_data$rahul24rally, zero.policy = TRUE)
merged_data$modirally24_lag <- lag.listw(W, merged_data$modi24rally, zero.policy = TRUE)
merged_data <- merged_data %>% mutate(todiff19and24 = turnout_2024-turnout2019)
sar_model <- lagsarlm(todiff19and24 ~ anyrally24 + anyrally24_lag,
data = merged_data,
listw = W,
zero.policy = TRUE)
summary(sar_model)
##
## Call:lagsarlm(formula = todiff19and24 ~ anyrally24 + anyrally24_lag,
## data = merged_data, listw = W, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.064752 -1.308721 0.008247 1.353958 9.394872
##
## Type: lag
## Regions with no neighbours included:
## 38 464
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.21014 0.28629 -4.2270 2.368e-05
## anyrally24 0.83823 0.22746 3.6852 0.0002286
## anyrally24_lag 0.79440 0.53379 1.4882 0.1366884
##
## Rho: 0.69705, LR test value: 230.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.038656
## z-value: 18.032, p-value: < 2.22e-16
## Wald statistic: 325.16, p-value: < 2.22e-16
##
## Log likelihood: -1258.1 for lag model
## ML residual variance (sigma squared): 6.5421, (sigma: 2.5578)
## Number of observations: 523
## Number of parameters estimated: 5
## AIC: 2526.2, (AIC for lm: 2754.7)
## LM test for residual autocorrelation
## test value: 38.196, p-value: 6.3983e-10
nb <- poly2nb(merged_data, queen = TRUE)
## Warning in poly2nb(merged_data, queen = TRUE): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(merged_data, queen = TRUE): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
merged_data$ST_NAME <- as.character(merged_data$ST_NAME)
merged_data$boundary_constituency <- sapply(1:length(nb), function(i) {
neighbor_indices <- nb[[i]]
neighbor_states <- unique(merged_data$ST_NAME[neighbor_indices])
return(ifelse(any(neighbor_states != merged_data$ST_NAME[i]), 1, 0))
})
merged_data <- merged_data %>%
mutate(Phase = case_when(
doe2024 == "2024-04-19" ~ "Phase 1",
doe2024 == "2024-04-26" ~ "Phase 2",
doe2024 == "2024-05-07" ~ "Phase 3",
doe2024 == "2024-05-13" ~ "Phase 4",
doe2024 == "2024-05-20" ~ "Phase 5",
doe2024 == "2024-05-25" ~ "Phase 6",
doe2024 == "2024-06-01" ~ "Phase 7",
TRUE ~ NA_character_
))
ggplot(merged_data) +
geom_sf(aes(fill = as.factor(boundary_constituency))) +
scale_fill_manual(values = c("0" = "gray", "1" = "red")) +
theme_minimal() +
ggtitle("Boundary Constituencies in India")

sar_model2 <- lagsarlm(todiff19and24 ~ anyrally24 + anyrally24_lag + boundary_constituency + as.factor(Phase),
data = merged_data, listw = W, zero.policy = TRUE)
summary(sar_model2)
##
## Call:lagsarlm(formula = todiff19and24 ~ anyrally24 + anyrally24_lag +
## boundary_constituency + as.factor(Phase), data = merged_data,
## listw = W, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.486495 -1.331912 -0.098319 1.287441 10.197879
##
## Type: lag
## Regions with no neighbours included:
## 38 464
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.77296 0.38612 -4.5917 4.395e-06
## anyrally24 0.82141 0.22504 3.6501 0.0002621
## anyrally24_lag 0.60157 0.54878 1.0962 0.2729936
## boundary_constituency -0.31544 0.22508 -1.4015 0.1610695
## as.factor(Phase)Phase 2 -0.35338 0.38448 -0.9191 0.3580435
## as.factor(Phase)Phase 3 1.08594 0.38606 2.8129 0.0049103
## as.factor(Phase)Phase 4 1.36755 0.39188 3.4897 0.0004835
## as.factor(Phase)Phase 5 1.31557 0.47999 2.7408 0.0061283
## as.factor(Phase)Phase 6 0.96700 0.44302 2.1828 0.0290527
## as.factor(Phase)Phase 7 0.77081 0.45069 1.7103 0.0872107
##
## Rho: 0.62531, LR test value: 165.35, p-value: < 2.22e-16
## Asymptotic standard error: 0.042774
## z-value: 14.619, p-value: < 2.22e-16
## Wald statistic: 213.72, p-value: < 2.22e-16
##
## Log likelihood: -1241.348 for lag model
## ML residual variance (sigma squared): 6.279, (sigma: 2.5058)
## Number of observations: 523
## Number of parameters estimated: 12
## AIC: 2506.7, (AIC for lm: 2670)
## LM test for residual autocorrelation
## test value: 14.852, p-value: 0.00011627
sar_model3 <- lagsarlm(Incvsdiff19and24 ~ rahul24rally + rahulrally24_lag + boundary_constituency + as.factor(Phase),
data = merged_data, listw = W, zero.policy = TRUE)
## Warning in subset.nb(x = nb, subset = subset): subsetting caused increase in
## subgraph count
summary(sar_model3)
##
## Call:lagsarlm(formula = Incvsdiff19and24 ~ rahul24rally + rahulrally24_lag +
## boundary_constituency + as.factor(Phase), data = merged_data,
## listw = W, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.5105 -5.2555 -1.1336 5.0321 36.5298
##
## Type: lag
## Regions with no neighbours included:
## 8 61 148 232 267 274 295 405 414 415 449 457 500 530 535
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0088056 1.7230654 -0.0051 0.99592
## rahul24rally 3.2796188 1.5854824 2.0685 0.03859
## rahulrally24_lag -0.3339589 3.8305493 -0.0872 0.93053
## boundary_constituency 2.4197308 1.2425813 1.9473 0.05149
## as.factor(Phase)Phase 2 0.1142460 1.9224866 0.0594 0.95261
## as.factor(Phase)Phase 3 -0.0381085 1.9043475 -0.0200 0.98403
## as.factor(Phase)Phase 4 1.4366603 1.9441376 0.7390 0.45992
## as.factor(Phase)Phase 5 5.3177792 2.9525665 1.8011 0.07169
## as.factor(Phase)Phase 6 6.4186365 2.6237264 2.4464 0.01443
## as.factor(Phase)Phase 7 -2.1745285 2.5702228 -0.8460 0.39753
##
## Rho: 0.41673, LR test value: 35.923, p-value: 2.0531e-09
## Asymptotic standard error: 0.0639
## z-value: 6.5217, p-value: 6.9514e-11
## Wald statistic: 42.533, p-value: 6.9514e-11
##
## Log likelihood: -1148.082 for lag model
## ML residual variance (sigma squared): 102.34, (sigma: 10.116)
## Number of observations: 306
## Number of parameters estimated: 12
## AIC: 2320.2, (AIC for lm: 2354.1)
## LM test for residual autocorrelation
## test value: 0.94559, p-value: 0.33084
sar_model4 <- lagsarlm(Bjpvsdiff19and24 ~ modi24rally + modirally24_lag + boundary_constituency + as.factor(Phase),
data = merged_data, listw = W, zero.policy = TRUE)
## Warning in subset.nb(x = nb, subset = subset): subsetting caused increase in
## subgraph count
summary(sar_model4)
##
## Call:lagsarlm(formula = Bjpvsdiff19and24 ~ modi24rally + modirally24_lag +
## boundary_constituency + as.factor(Phase), data = merged_data,
## listw = W, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.62910 -3.80240 -0.20617 2.95627 41.89311
##
## Type: lag
## Regions with no neighbours included:
## 415 478 507
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.61075 1.34221 -1.2001 0.23011
## modi24rally -0.51792 0.73993 -0.7000 0.48396
## modirally24_lag 2.65332 1.88749 1.4057 0.15980
## boundary_constituency -1.39932 0.74604 -1.8757 0.06070
## as.factor(Phase)Phase 2 1.28442 1.33603 0.9614 0.33637
## as.factor(Phase)Phase 3 1.01297 1.28945 0.7856 0.43211
## as.factor(Phase)Phase 4 3.23499 1.35692 2.3841 0.01712
## as.factor(Phase)Phase 5 -0.96874 1.54762 -0.6260 0.53135
## as.factor(Phase)Phase 6 -0.65616 1.43259 -0.4580 0.64694
## as.factor(Phase)Phase 7 -0.26279 1.50248 -0.1749 0.86116
##
## Rho: 0.683, LR test value: 189.24, p-value: < 2.22e-16
## Asymptotic standard error: 0.039624
## z-value: 17.237, p-value: < 2.22e-16
## Wald statistic: 297.11, p-value: < 2.22e-16
##
## Log likelihood: -1361.977 for lag model
## ML residual variance (sigma squared): 50.831, (sigma: 7.1296)
## Number of observations: 396
## Number of parameters estimated: 12
## AIC: 2748, (AIC for lm: 2935.2)
## LM test for residual autocorrelation
## test value: 6.0816, p-value: 0.01366