What is needed:

Congress vote and seats tally in 2024 nationally, and states [gains and losses in comparison to 2019]
Congress performance in a historical setting (seats contested, deposits)
Congress social base – caste and class [using Lokniti-CSDS]
Party performance using constituency-level demographic estimates
Party performance using constituency-level political measures (margin of victory etc)

library(readr)

scrape_row <- read_csv("scrape_row.csv", col_names = TRUE)

## Rows: 8902 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (20): State_Code, State_Name, idi, Constituency_Name, Constituency_Type,...
## dbl (31): Constituency_No, Position, Vote_Share_Percentage, Margin_Percentag...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data2024 <- scrape_row

library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyselect)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidyr)

# Let me first quickly change BJP and INC in the party column name to acronyms

data2024 <- data2024 %>% mutate(Party = case_when(Party == "Indian National Congress" ~ 'INC',
                                                  Party == "Bharatiya Janata Party" ~ 'BJP', TRUE ~ Party))

#let's filter out the INC candidates data
dataINC <- data2024 %>% filter(Party ==  "INC")
# the total number of seats that they won
sum(dataINC$Position == 1)

## [1] 99

#Now let us separate the seat tallies by states.

incvictory <- dataINC %>% filter(Position == 1)

# Let us make a table out of it

INCstatedf <- as.data.frame(table(incvictory$State_Name))

library(kableExtra)

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

colnamestate <- c('State', 'Seats Won')

colnames(INCstatedf) <- colnamestate

kbl(INCstatedf) %>%
  kable_minimal() %>%kable_styling(full_width = FALSE)

State	Seats Won
Assam	3
Bihar	3
Chandigarh	1
Chhattisgarh	1
Goa	1
Gujarat	1
Haryana	5
Jharkhand	2
Karnataka	9
Kerala	14
Lakshadweep	1
Maharashtra	13
Manipur	2
Meghalaya	1
Nagaland	1
Odisha	1
Puducherry	1
Punjab	7
Rajasthan	8
Tamil_Nadu	9
Telangana	8
Uttar_Pradesh	6
West_Bengal	1

# Most number of victories has come in Kerala, followed by Maharashtra and TN.

# Let us first select some relevant columns 

incvictoryprev <- incvictory %>% select(State_Name, Constituency_Name, Constituency_Type,
                                        Candidate_Name, Vote_Share_Percentage, Margin_Percentage, Margin, Votes,Vote_Share_Percentage, Valid_Votes, winner2019)

# let us now see who was the biggest loser compared to 2019.

inroads <- as.data.frame(table(incvictoryprev$winner2019))

# It can be seen that most of their new victories came against the BJP with 41
# It is followed by the TRS that floated the idea of having a third front 
# The idea was thwarted in all senses and the party suffered heavily in this
# election with the rise of NDA in the state and INC's consolidaiton of its momentum from
# the earlier held state elections.AIMIM won the one seat that was left behind
# after NDA and INDIA won 8 each.
# Three are from DMK but they were in coalition in the state
# so it doesn't count as much 
# Gained two from the Shiv Sena as the officially recognized entity under the name
# is the one under Shinde. Ramtek and Kolhapur.
# In Kolhapur, the runner up from SHS was the previous incumbent. 
# Won one from BSP in Saharanpur.
# Won one from CPM as they didn't form a coalition in this election in the state
# for seat sharing.
# One from an indepednet candidate in Maharashtra from Amravati. The independent
# was anyways a member of the UPA in 2019
# One against JDS in Karnataka. The unpopular Revanna was defated by 
# an INC candidate
# One against JDU in Bihar's Katihar
# One against NCP in Lakshadweep.
# One from NDPP in Nagaland, which is in coalition with the BJP in NDA.
# One in Meghalaya against NPEP, which is in the NDA.
# One in Nagaland against NFP, which is in NDA.
# One in Punjab against SAD, which contested indepedently of any coalition this time.
# So removing the DMK seats and the one independent candidate,
# They have won 58 new seats against the NDA or non-INDIA members. 

# if we make a dataset that only uses other party winning data then we can see how many did the congress lose

dataothers <- data2024 %>% filter(!Party ==  "INC")

dataothers <- dataothers %>% filter(Position == 1)

table(dataothers$winner2019)

## 
##   AAAP   ADAL   ADMK  AIMIM   AITC  AJSUP    BJD    BJP    BSP    CPI    CPM 
##      1      2      1      2     22      1     12    251      9      2      2 
##    DMK    INC    IND   IUML  JD(U)    JMM KEC(M)    LJP    MNF    NCP   RLTP 
##     21     15      2      3     15      1      1      6      1      4      1 
##    RSP    SAD    SHS    SKM     SP    TDP    TRS    VCK  YSRCP 
##      1      1     16      1      5      3      3      1     22

dataothersINCrelation <- dataothers %>% filter(dataothers$winner2019 == "INC")

table(dataothersINCrelation$Party)

## 
##                        Aam Aadmi Party           All India Trinamool Congress 
##                                      1                                      1 
##                                    BJP    Communist Party of India  (Marxist) 
##                                      6                                      1 
##              Dravida Munnetra Kazhagam                            Independent 
##                                      1                                      2 
##                 Jharkhand Mukti Morcha Marumalarchi Dravida Munnetra Kazhagam 
##                                      1                                      1 
##              Voice of the People Party 
##                                      1

# As we can see that INC lost most of its seat that it lost to the BJP in 2024. 
# They lost six to them.
# They lost one to VVP in Tamil Nadu - Vaiko 
# There is also the notable exception of AAP as they have conceded one seat to them 
# while they are in a national alliance in INDIA, they decided to forego an 
# Alliance in punjab as they are the main rivals in the state. 
# Similarly the same happened in Alathur in Kerala. 
# Effectively, Congress lost 8 seats to other parties that it held earlier.
# The two independent candidates are from Punjab. 
# One of them (Sukhbeer Singh Khalsa) is a son of Benat Singh, one of the assassins of Indira Gandhi
# The other is Amritpal singh who has links to Waris Punjab De.
# Neither of them have ties to either of the alliances - INDIA or NDA
# Therefore, it beings the tally of lost seats to 10 for Congress.

Let us make some early models

# will first need to separate the data for just the INC candidates.
congcandi <- data2024 %>% filter(Party == 'INC')
# Let us see the general trends of how these candidates fared in their respective
# constituencies. 
table(congcandi$Position)

## 
##   1   2   3   4   5   9 
##  99 167  55   5   1   1

# INC really likes the silver medal it seems
# Let us now create a dummy for whether they have won or not
congcandi <- congcandi %>% mutate(goldmedal = ifelse(Position == 1,1,0))
# Dummy for if they won the last time
congcandi <- congcandi %>% mutate(former = ifelse(winner2019 == 'INC',1,0))
table(congcandi$goldmedal)

## 
##   0   1 
## 229  99

# Now let us make the first model that checks fow whether there is a difference 
# in how they fared in the reserved constituencies.
model1 <- glm(goldmedal ~ Constituency_Type + former + as.numeric(as.character(ruralPercent)), data = congcandi, family = 'binomial')
summary(model1)

## 
## Call:
## glm(formula = goldmedal ~ Constituency_Type + former + as.numeric(as.character(ruralPercent)), 
##     family = "binomial", data = congcandi)
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                            -1.508804   0.433212  -3.483 0.000496
## Constituency_TypeSC                     0.772074   0.354581   2.177 0.029449
## Constituency_TypeST                     0.542141   0.418537   1.295 0.195208
## former                                  2.351899   0.378499   6.214 5.17e-10
## as.numeric(as.character(ruralPercent))  0.001408   0.005905   0.238 0.811499
##                                           
## (Intercept)                            ***
## Constituency_TypeSC                    *  
## Constituency_TypeST                       
## former                                 ***
## as.numeric(as.character(ruralPercent))    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 385.89  on 312  degrees of freedom
## Residual deviance: 336.38  on 308  degrees of freedom
##   (15 observations deleted due to missingness)
## AIC: 346.38
## 
## Number of Fisher Scoring iterations: 4

Let us see if i thas done better in the states where it fought mostly on its

own or where it had support from the partners

library(ggplot2)

# Load necessary package
library(ggplot2)
density_data <- density(congcandi$Vote_Share_Percentage, na.rm = TRUE)
density_df <- data.frame(x = density_data$x, y = density_data$y)

# Plot density curve and shade the area under the curve less than 16.67
ggplot(density_df, aes(x = x, y = y)) +
  geom_area(data = subset(density_df, x <= 16.67), aes(y = y), fill = "red", alpha = 0.5) +
  geom_area(data = subset(density_df, x > 16.67), aes(y = y), fill = "blue", alpha = 0.5) +
  geom_vline(xintercept = 16.67, linetype = "dashed", color = "red", size = 1) +
  labs(title = "Density Curve of Vote Share Percentage INC 2024",
       x = "Vote Share Percentage",
       y = "Density") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# the absolute number is as follows

sum(congcandi$Vote_Share_Percentage < 16.7)

## [1] 51

# The party lost its deposit in 51 seats of 328 it contested. (15.55%)
# The figure was 184 out of 421 seats.(43.7%)

# The three big states where there were parties in the INDIA alliance contested
# Separately were West Bengal, Kerala, and Punjab
# Let us create a dummy for these states and see if it affected the performace
# of the INC in these states

congcandi <- congcandi %>% mutate(separate = ifelse(State_Name == c('West_Bengal','Kerala', 'Punjab'),1,0))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `separate = ifelse(...)`.
## Caused by warning in `State_Name == c("West_Bengal", "Kerala", "Punjab")`:
## ! longer object length is not a multiple of shorter object length

model2 <- glm(goldmedal ~ separate + Constituency_Type, data = congcandi, family = 'binomial')
summary(model2)

## 
## Call:
## glm(formula = goldmedal ~ separate + Constituency_Type, family = "binomial", 
##     data = congcandi)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -1.0009     0.1479  -6.770 1.29e-11 ***
## separate              0.8863     0.5540   1.600   0.1096    
## Constituency_TypeSC   0.5406     0.3242   1.667   0.0954 .  
## Constituency_TypeST   0.2670     0.3810   0.701   0.4836    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 401.74  on 327  degrees of freedom
## Residual deviance: 396.33  on 324  degrees of freedom
## AIC: 404.33
## 
## Number of Fisher Scoring iterations: 4

New TCPD dataset with all years

library(readr)
library(dplyr)
library(purrr)

# Read the data
TCPD_GE_All_States_2024_7_18 <- read_csv("TCPD_GE_All_States_2024-7-18.csv")

## Rows: 91669 Columns: 45
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): State_Name, Candidate, Sex, Party, Candidate_Type, Constituency_Na...
## dbl (20): Assembly_No, Constituency_No, Year, month, Poll_No, DelimID, Posit...
## lgl  (6): last_poll, Same_Constituency, Same_Party, Turncoat, Incumbent, Rec...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

compdata <- TCPD_GE_All_States_2024_7_18 %>%
  filter(Year %in% c(1991, 1996, 1998, 1999, 2004, 2009, 2014, 2019))

listdatabyyear <- split(compdata, compdata$Year)
names(listdatabyyear) <- paste0("data", names(listdatabyyear))
list2env(listdatabyyear, envir = .GlobalEnv)

## <environment: R_GlobalEnv>

years <- c(1991, 1996, 1998, 1999, 2004, 2009, 2014, 2019)
parties <- c("INC", "BJP")

# Apply the filter for each year and party
map2(
  rep(years, each = 2),
  rep(parties, times = 8),
  ~ assign(
    paste0("data", .x, .y),
    compdata %>% filter(Year == .x, Party == .y),
    envir = .GlobalEnv
  )
)

## [[1]]
## # A tibble: 492 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          10               1  1991     5       0       3        1
##  2 Andhra_Prad…          10               1  1991     5       0       3        1
##  3 Andhra_Prad…          10               2  1991     5       0       3        1
##  4 Andhra_Prad…          10               3  1991     5       0       3        1
##  5 Andhra_Prad…          10               4  1991     5       0       3        2
##  6 Andhra_Prad…          10               5  1991     5       0       3        1
##  7 Andhra_Prad…          10               6  1991     5       0       3        1
##  8 Andhra_Prad…          10               7  1991     5       0       3        2
##  9 Andhra_Prad…          10               8  1991     5       0       3        2
## 10 Andhra_Prad…          10               9  1991     5       0       3        2
## # ℹ 482 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[2]]
## # A tibble: 472 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          10               1  1991     5       0       3        3
##  2 Andhra_Prad…          10               1  1991     5       0       3        4
##  3 Andhra_Prad…          10               3  1991     5       0       3        3
##  4 Andhra_Prad…          10               4  1991     5       0       3        3
##  5 Andhra_Prad…          10               5  1991     5       0       3        3
##  6 Andhra_Prad…          10               6  1991     5       0       3        3
##  7 Andhra_Prad…          10               7  1991     5       0       3        3
##  8 Andhra_Prad…          10               8  1991     5       0       3        3
##  9 Andhra_Prad…          10               9  1991     5       0       3        3
## 10 Andhra_Prad…          10              10  1991     5       0       3        3
## # ℹ 462 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[3]]
## # A tibble: 532 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          11               1  1996     4       0       3        1
##  2 Andhra_Prad…          11               1  1996     4       0       3        3
##  3 Andhra_Prad…          11               2  1996     4       0       3        1
##  4 Andhra_Prad…          11               3  1996     4       0       3        2
##  5 Andhra_Prad…          11               4  1996     4       0       3        1
##  6 Andhra_Prad…          11               5  1996     4       0       3        2
##  7 Andhra_Prad…          11               6  1996     4       0       3        2
##  8 Andhra_Prad…          11               7  1996     4       0       3        2
##  9 Andhra_Prad…          11               8  1996     4       0       3        1
## 10 Andhra_Prad…          11               9  1996     4       0       3        1
## # ℹ 522 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[4]]
## # A tibble: 473 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          11               1  1996     4       0       3        2
##  2 Andhra_Prad…          11               1  1996     4       0       3        4
##  3 Andhra_Prad…          11               2  1996     4       0       3        4
##  4 Andhra_Prad…          11               3  1996     4       0       3        4
##  5 Andhra_Prad…          11               4  1996     4       0       3        4
##  6 Andhra_Prad…          11               5  1996     4       0       3        4
##  7 Andhra_Prad…          11               6  1996     4       0       3        4
##  8 Andhra_Prad…          11               7  1996     4       0       3        4
##  9 Andhra_Prad…          11               8  1996     4       0       3        4
## 10 Andhra_Prad…          11               9  1996     4       0       3        4
## # ℹ 463 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[5]]
## # A tibble: 479 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          12               1  1998     3       0       3        1
##  2 Andhra_Prad…          12               1  1998     3       0       3        3
##  3 Andhra_Prad…          12               2  1998     3       0       3        2
##  4 Andhra_Prad…          12               3  1998     3       0       3        2
##  5 Andhra_Prad…          12               4  1998     3       0       3        1
##  6 Andhra_Prad…          12               5  1998     3       0       3        2
##  7 Andhra_Prad…          12               6  1998     3       0       3        1
##  8 Andhra_Prad…          12               7  1998     3       0       3        3
##  9 Andhra_Prad…          12               8  1998     3       0       3        3
## 10 Andhra_Prad…          12               9  1998     3       0       3        2
## # ℹ 469 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[6]]
## # A tibble: 389 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          12               1  1998     3       0       3        2
##  2 Andhra_Prad…          12               3  1998     3       0       3        3
##  3 Andhra_Prad…          12               4  1998     3       0       3        3
##  4 Andhra_Prad…          12               6  1998     3       0       3        3
##  5 Andhra_Prad…          12               7  1998     3       0       3        1
##  6 Andhra_Prad…          12               8  1998     3       0       3        1
##  7 Andhra_Prad…          12               9  1998     3       0       3        3
##  8 Andhra_Prad…          12              10  1998     3       0       3        3
##  9 Andhra_Prad…          12              11  1998     3       0       3        3
## 10 Andhra_Prad…          12              12  1998     3       0       3        3
## # ℹ 379 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[7]]
## # A tibble: 453 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          13               1  1999     9       0       3        2
##  2 Andhra_Prad…          13               1  1999     9       0       3        2
##  3 Andhra_Prad…          13               2  1999     9       0       3        2
##  4 Andhra_Prad…          13               3  1999     9       0       3        1
##  5 Andhra_Prad…          13               4  1999     9       0       3        2
##  6 Andhra_Prad…          13               5  1999     9       0       3        2
##  7 Andhra_Prad…          13               6  1999     9       0       3        2
##  8 Andhra_Prad…          13               7  1999     9       0       3        2
##  9 Andhra_Prad…          13               8  1999     9       0       3        2
## 10 Andhra_Prad…          13               9  1999     9       0       3        2
## # ℹ 443 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[8]]
## # A tibble: 339 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          13               1  1999     9       0       3        1
##  2 Andhra_Prad…          13               8  1999     9       0       3        1
##  3 Andhra_Prad…          13              10  1999     9       0       3        1
##  4 Andhra_Prad…          13              20  1999     9       0       3        1
##  5 Andhra_Prad…          13              29  1999     9       0       3        1
##  6 Andhra_Prad…          13              30  1999     9       0       3        2
##  7 Andhra_Prad…          13              31  1999     9       0       3        1
##  8 Andhra_Prad…          13              33  1999     9       0       3        1
##  9 Andhra_Prad…          13              37  1999     9       0       3        1
## 10 Arunachal_P…          13               2  1999     9       0       3        2
## # ℹ 329 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[9]]
## # A tibble: 420 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          14               1  2004     4       0       3        1
##  2 Andhra_Prad…          14               1  2004     4       0       3        2
##  3 Andhra_Prad…          14               2  2004     4       0       3        1
##  4 Andhra_Prad…          14               3  2004     4       0       3        2
##  5 Andhra_Prad…          14               4  2004     4       0       3        1
##  6 Andhra_Prad…          14               6  2004     4       0       3        2
##  7 Andhra_Prad…          14               7  2004     4       0       3        1
##  8 Andhra_Prad…          14               8  2004     4       0       3        1
##  9 Andhra_Prad…          14               9  2004     4       0       3        1
## 10 Andhra_Prad…          14              10  2004     4       0       3        1
## # ℹ 410 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[10]]
## # A tibble: 367 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          14               1  2004     4       0       3        2
##  2 Andhra_Prad…          14               8  2004     4       0       3        2
##  3 Andhra_Prad…          14              10  2004     4       0       3        2
##  4 Andhra_Prad…          14              19  2004     4       0       3        2
##  5 Andhra_Prad…          14              20  2004     4       0       3        2
##  6 Andhra_Prad…          14              30  2004     4       0       3        2
##  7 Andhra_Prad…          14              31  2004     4       0       3        2
##  8 Andhra_Prad…          14              33  2004     4       0       3        2
##  9 Andhra_Prad…          14              37  2004     4       0       3        2
## 10 Andhra_Prad…          14              41  2004     4       0       3        2
## # ℹ 357 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[11]]
## # A tibble: 441 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          15               1  2009     4       0       4        2
##  2 Andhra_Prad…          15               1  2009     4       0       4        2
##  3 Andhra_Prad…          15               2  2009     4       0       4        1
##  4 Andhra_Prad…          15               3  2009     4       0       4        1
##  5 Andhra_Prad…          15               4  2009     4       0       4        1
##  6 Andhra_Prad…          15               5  2009     4       0       4        1
##  7 Andhra_Prad…          15               6  2009     4       0       4        2
##  8 Andhra_Prad…          15               7  2009     4       0       4        1
##  9 Andhra_Prad…          15               8  2009     4       0       4        1
## 10 Andhra_Prad…          15               9  2009     4       0       4        3
## # ℹ 431 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[12]]
## # A tibble: 434 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          15               1  2009     4       0       4        1
##  2 Andhra_Prad…          15               1  2009     4       0       4        4
##  3 Andhra_Prad…          15               2  2009     4       0       4        4
##  4 Andhra_Prad…          15               3  2009     4       0       4        4
##  5 Andhra_Prad…          15               4  2009     4       0       4        4
##  6 Andhra_Prad…          15               5  2009     4       0       4        4
##  7 Andhra_Prad…          15               6  2009     4       0       4        4
##  8 Andhra_Prad…          15               7  2009     4       0       4        4
##  9 Andhra_Prad…          15               8  2009     4       0       4        2
## 10 Andhra_Prad…          15               9  2009     4       0       4        4
## # ℹ 424 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[13]]
## # A tibble: 468 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          16               1  2014     4       0       4        2
##  2 Andhra_Prad…          16               1  2014     4       0       4        2
##  3 Andhra_Prad…          16               2  2014     4       0       4        2
##  4 Andhra_Prad…          16               3  2014     4       0       4        2
##  5 Andhra_Prad…          16               4  2014     4       0       4        2
##  6 Andhra_Prad…          16               5  2014     4       0       4        2
##  7 Andhra_Prad…          16               6  2014     4       0       4        2
##  8 Andhra_Prad…          16               7  2014     4       0       4        3
##  9 Andhra_Prad…          16               8  2014     4       0       4        2
## 10 Andhra_Prad…          16               9  2014     4       0       4        3
## # ℹ 458 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[14]]
## # A tibble: 433 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          16               1  2014     4       0       4        1
##  2 Andhra_Prad…          16               3  2014     4       0       4        3
##  3 Andhra_Prad…          16               4  2014     4       0       4        3
##  4 Andhra_Prad…          16               6  2014     4       0       4        3
##  5 Andhra_Prad…          16               8  2014     4       0       4        1
##  6 Andhra_Prad…          16               9  2014     4       0       4        2
##  7 Andhra_Prad…          16              11  2014     4       0       4        3
##  8 Andhra_Prad…          16              14  2014     4       0       4        3
##  9 Andhra_Prad…          16              15  2014     4       0       4        3
## 10 Andhra_Prad…          16              21  2014     4       0       4        1
## # ℹ 423 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[15]]
## # A tibble: 422 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          17               1  2019     4       0       4        1
##  2 Andhra_Prad…          17               1  2019     4       0       4        6
##  3 Andhra_Prad…          17               2  2019     4       0       4        5
##  4 Andhra_Prad…          17               3  2019     4       0       4        5
##  5 Andhra_Prad…          17               4  2019     4       0       4        6
##  6 Andhra_Prad…          17               5  2019     4       0       4        6
##  7 Andhra_Prad…          17               6  2019     4       0       4        6
##  8 Andhra_Prad…          17               7  2019     4       0       4        6
##  9 Andhra_Prad…          17               8  2019     4       0       4        5
## 10 Andhra_Prad…          17               9  2019     4       0       4        4
## # ℹ 412 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …
## 
## [[16]]
## # A tibble: 437 × 45
##    State_Name   Assembly_No Constituency_No  Year month Poll_No DelimID Position
##    <chr>              <dbl>           <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
##  1 Andaman_&_N…          17               1  2019     4       0       4        2
##  2 Andhra_Prad…          17               1  2019     4       0       4        5
##  3 Andhra_Prad…          17               2  2019     4       0       4        6
##  4 Andhra_Prad…          17               3  2019     4       0       4        6
##  5 Andhra_Prad…          17               4  2019     4       0       4        4
##  6 Andhra_Prad…          17               5  2019     4       0       4        5
##  7 Andhra_Prad…          17               6  2019     4       0       4        5
##  8 Andhra_Prad…          17               7  2019     4       0       4        5
##  9 Andhra_Prad…          17               8  2019     4       0       4        6
## 10 Andhra_Prad…          17               9  2019     4       0       4        5
## # ℹ 427 more rows
## # ℹ 37 more variables: Candidate <chr>, Sex <chr>, Party <chr>, Votes <dbl>,
## #   Candidate_Type <chr>, Valid_Votes <dbl>, Electors <dbl>,
## #   Constituency_Name <chr>, Constituency_Type <chr>, Sub_Region <chr>,
## #   N_Cand <dbl>, Turnout_Percentage <dbl>, Vote_Share_Percentage <dbl>,
## #   Deposit_Lost <chr>, Margin <dbl>, Margin_Percentage <dbl>, ENOP <dbl>,
## #   pid <chr>, Party_Type_TCPD <chr>, Party_ID <dbl>, last_poll <lgl>, …

# Remove byelection seats for INC1991
data1991INC <- head(data1991INC, n = nrow(data1991INC) - 5)
 
# For 1991
mobiINC1991 <- sum(data1991INC$Votes) / sum(data1991INC$Electors)
mobiBJP1991 <- sum(data1991BJP$Votes) / sum(data1991BJP$Electors)

# For 1996
mobiINC1996 <- sum(data1996INC$Votes) / sum(data1996INC$Electors)
mobiBJP1996 <- sum(data1996BJP$Votes) / sum(data1996BJP$Electors)

# For 1998
mobiINC1998 <- sum(data1998INC$Votes) / sum(data1998INC$Electors)
mobiBJP1998 <- sum(data1998BJP$Votes) / sum(data1998BJP$Electors)

# For 1999
mobiINC1999 <- sum(data1999INC$Votes) / sum(data1999INC$Electors)
mobiBJP1999 <- sum(data1999BJP$Votes) / sum(data1999BJP$Electors)
 
# For 2004
mobiINC2004 <- sum(data2004INC$Votes)/sum(data2004INC$Electors)
mobiBJP2004 <- sum(data2004BJP$Votes)/sum(data2004BJP$Electors)

#For 2009
mobiINC2009 <- sum(data2009INC$Votes)/sum(data2009INC$Electors)
mobiBJP2009 <- sum(data2009BJP$Votes)/sum(data2009BJP$Electors)

#For 2014
mobiINC2014 <- sum(data2014INC$Votes)/sum(data2014INC$Electors)
mobiBJP2014 <- sum(data2014BJP$Votes)/sum(data2014BJP$Electors)

#For 2019
mobiINC2019 <- sum(data2019INC$Votes)/sum(data2019INC$Electors)
mobiBJP2019 <- sum(data2019BJP$Votes)/sum(data2019BJP$Electors)


data2024BJP <- data2024 %>% filter(Party == 'BJP')
data2024INC <- data2024 %>% filter(Party == 'INC')

mobiINC2024 <- sum(data2024INC$Votes)/sum(data2024INC$electors_2024)
mobiBJP2024 <- sum(data2024BJP$Votes, na.rm = TRUE)/sum(data2024BJP$electors_2024, na.rm = TRUE)


library(purrr)

if (exists("data1991INC") && exists("data1991BJP") && 
    exists("data1996INC") && exists("data1996BJP") &&
    exists("data1998INC") && exists("data1998BJP") &&
    exists("data1999INC") && exists("data1999BJP") &&
    exists("data2004INC") && exists("data2004BJP") &&
    exists("data2009INC") && exists("data2009BJP") &&
    exists("data2014INC") && exists("data2014BJP") &&
    exists("data2019INC") && exists("data2019BJP")) 
  
datasets <- list(
  data1991INC = data1991INC, data1991BJP = data1991BJP,
  data1996INC = data1996INC, data1996BJP = data1996BJP,
  data1998INC = data1998INC, data1998BJP = data1998BJP,
  data1999INC = data1999INC, data1999BJP = data1999BJP,
  data2004INC = data2004INC, data2004BJP = data2004BJP,
  data2009INC = data2009INC, data2009BJP = data2009BJP,
  data2014INC = data2014INC, data2014BJP = data2014BJP,
  data2019INC = data2019INC, data2019BJP = data2019BJP
)

missing_datasets <- names(datasets)[!sapply(datasets, function(x) !is.null(x))]
if (length(missing_datasets) > 0) {
  stop(paste("The following datasets are missing:", paste(missing_datasets, collapse = ", ")))
}

mobi_values <- map2(
  names(datasets),
  rep(c("INC", "BJP"), length.out = length(datasets)),
  ~ sum(datasets[[.x]]$Votes) / sum(datasets[[.x]]$Electors)
)

# Let us create individual mobilization score for each district. 

datasets <- datasets %>% map(~ mutate(.x, mobi = Votes/Electors))

#update it in the env
list2env(datasets, envir = .GlobalEnv)

## <environment: R_GlobalEnv>

# now if we find the values by average instead of the one used above:
calc_mobi_avg <- function(df) {
  sum(df$mobi) / nrow(df)
}

mobi_avg <- datasets %>% map(calc_mobi_avg)

print(mobi_avg)

## $data1991INC
## [1] 0.215566
## 
## $data1991BJP
## [1] 0.144637
## 
## $data1996INC
## [1] 0.1735236
## 
## $data1996BJP
## [1] 0.1309606
## 
## $data1998INC
## [1] 0.1829057
## 
## $data1998BJP
## [1] 0.2155257
## 
## $data1999INC
## [1] 0.2028397
## 
## $data1999BJP
## [1] 0.2216685
## 
## $data2004INC
## [1] 0.2047149
## 
## $data2004BJP
## [1] 0.1871222
## 
## $data2009INC
## [1] 0.2102997
## 
## $data2009BJP
## [1] 0.1346299
## 
## $data2014INC
## [1] 0.1562795
## 
## $data2014BJP
## [1] 0.2556924
## 
## $data2019INC
## [1] 0.1733923
## 
## $data2019BJP
## [1] 0.3065449

# let us calculate the winning differential over the time:

list2env(lapply(listdatabyyear, function(df) {
     df %>%
         group_by(Constituency_Name) %>%
         slice_max(Votes, n = 2) %>%
         ungroup()
 }), envir = .GlobalEnv)

## <environment: R_GlobalEnv>

 # the fucntion can be as follows. It first removes every candidate except for
# the top two and then finds the difference. 
 calculate_avg_differential <- function(df) {
     df %>%
         group_by(Constituency_Name) %>%
         arrange(desc(Vote_Share_Percentage)) %>%
         slice(1:2) %>%
         summarise(difference = Vote_Share_Percentage[1] - Vote_Share_Percentage[2], .groups = 'drop') %>%
         summarise(avg_differential = mean(difference, na.rm = TRUE))
 }
 
avg_differentials <- lapply(listdatabyyear, calculate_avg_differential)
 
names(avg_differentials) <- names(listdatabyyear)
 
avg_differentials_values <- sapply(avg_differentials, function(df) df$avg_differential)
 
avg_differentials_values

##  data1991  data1996  data1998  data1999  data2004  data2009  data2014  data2019 
## 14.126583 12.224019 10.283142 10.213296 12.171204  9.702722 15.007172 17.183050

# Function to calculate the median winning differential for a dataset
calc_median_differential <- function(df) {
  df %>%
    group_by(Constituency_Name) %>%
    arrange(desc(Vote_Share_Percentage)) %>%
    slice(1:2) %>%
    summarise(difference = Vote_Share_Percentage[1] - Vote_Share_Percentage[2], .groups = 'drop') %>%
    summarise(median_differential = median(difference, na.rm = TRUE))
}

median_differentials <- lapply(listdatabyyear, calc_median_differential)

names(median_differentials) <- names(listdatabyyear)

median_differentials_values <- sapply(median_differentials, function(df) df$median_differential)

median_differentials_values

## data1991 data1996 data1998 data1999 data2004 data2009 data2014 data2019 
##   10.580    9.370    7.630    7.890    9.940    6.945   13.540   15.060

average_differential2024 <- data2024 %>%
  arrange(Constituency_Name, Position) %>%              
  group_by(Constituency_Name) %>%                         
  summarise(differential = {
    if(n() >= 2) {
      first_candidate <- Vote_Share_Percentage[1]
      second_candidate <- Vote_Share_Percentage[2]
      first_candidate - second_candidate
    } else {
      NA
    }
  }) %>%
  summarise(average_differential = mean(differential, na.rm = TRUE))  

print(average_differential2024)

## # A tibble: 1 × 1
##   average_differential
##                  <dbl>
## 1                 13.5

# Calculating the median differential for 2024
median_differential2024 <- data2024 %>%
  arrange(Constituency_Name, Position) %>%                
  group_by(Constituency_Name) %>%                        
  summarise(differential = {
    if(n() >= 2) {
      first_candidate <- Vote_Share_Percentage[1]
      second_candidate <- Vote_Share_Percentage[2]
      first_candidate - second_candidate
    } else {
      NA 
    }
  }) %>%
  summarise(median_differential = median(differential, na.rm = TRUE))  # Compute the median differential

print(median_differential2024)

## # A tibble: 1 × 1
##   median_differential
##                 <dbl>
## 1                10.3

Now let us find the performance of the top three parties

# Function to calculate the top three parties by number of seats won in a single dataset
 calc_top_three_parties <- function(df) {
     df %>%
         # Ensure 'Votes' is numeric
         mutate(Votes = as.numeric(Votes)) %>%
         # Find the winner in each constituency
         group_by(Constituency_Name) %>%
         slice_max(order_by = Votes, n = 1, with_ties = FALSE) %>%
         ungroup() %>%
         # Count the number of seats won by each party
         count(Party, sort = TRUE) %>%
         # Get the top three parties
         slice_max(order_by = n, n = 3) %>%
         ungroup()
 }
 
 # Apply the function to each dataset in the list and add a 'Year' column
 top_parties_by_year <- lapply(seq_along(listdatabyyear), function(i) {
     df <- calc_top_three_parties(listdatabyyear[[i]])
     df$Year <- names(listdatabyyear)[i]
     return(df)
 })
 
 # Combine all results into a single dataframe
 combined_top_parties <- bind_rows(top_parties_by_year)
 
 # Print the combined dataframe
 print(combined_top_parties)

## # A tibble: 24 × 3
##    Party     n Year    
##    <chr> <int> <chr>   
##  1 INC     232 data1991
##  2 BJP     118 data1991
##  3 JD       58 data1991
##  4 BJP     159 data1996
##  5 INC     139 data1996
##  6 JD       45 data1996
##  7 BJP     180 data1998
##  8 INC     141 data1998
##  9 CPM      32 data1998
## 10 BJP     182 data1999
## # ℹ 14 more rows

pivoted_top_parties <- combined_top_parties %>%
     pivot_wider(names_from = Party, values_from = n, values_fill = list(n = 0))

# will have to look up the vote share online as the data is doing weird things

write.csv(pivoted_top_parties, file = "pivot1991-2019.csv")

write_excel_csv(pivoted_top_parties, file = "pivoted_top_parties.csv")

congpaperscript

Pawas

2024-07-08

What is needed:

Let us make some early models

Let us see if i thas done better in the states where it fought mostly on its

own or where it had support from the partners

New TCPD dataset with all years

Now let us find the performance of the top three parties