Dataset

Dataset: Loksabha 2019 Candidates General Information. (https://www.kaggle.com/datasets/themlphdstudent/lok-sabha-election-candidate-list-2004-to-2019)

# Importing required libraries

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggrepel)

Loading our dataset

data <-read.csv('C:\\Users\\bhush\\Downloads\\Coursework\\I 590 INTRO TO R\\datasets\\data_final\\LokSabha2019_xl.csv')

Calculating Equity of each candidate and storing in a new column:

We know that “Assets - Liabilities = Equity” (More information on: Link)

data_copy <- data

# Calculating the equity for each party's candidates
data_copy_ <- data_copy|>
    mutate(equity = data_copy$Total.Assets - data_copy$Liabilities)

Age Deviation for the each party’s candidates

# Finding the age deviation for different parties.
data_copy2 <- data_copy|>
        group_by(Party,Winner)|>
          mutate(Age_deviation = as.integer(Age - mean(Age)))|>
            ungroup()|>
              select(Candidate,Party,Age,Age_deviation,Winner)

Ranking based on Equity amount ( For accessing the candidate’s wealth )

data_copy3 <- data_copy |>
          mutate(Equity_amt = data_copy$Total.Assets - data_copy$Liabilities)|>
          mutate(real_wealth_rank = rank(desc(Equity_amt)))
          

data_copy3<- data_copy3 |> 
  arrange((real_wealth_rank))

Calculating Male to Female Ratio for each constituency.

data_copy4 <- data_copy |>
  group_by(Constituency) |>
  summarise(
    Male_Count = sum(Gender == 'M'),
    Female_Count = sum(Gender == 'F'),
  )|>
  mutate(Male_to_Female_Ratio = Male_Count / (Female_Count+1))|>
  ungroup()

# Plotting Male to Female ratio against Constituency using a jitter plot
data_copy4 |>
  ggplot() +
  geom_jitter(mapping = aes(x = Male_to_Female_Ratio, y = Constituency)) +
  geom_text_repel(
    data = filter(data_copy4, Male_to_Female_Ratio > 24),
    mapping = aes(x = Male_to_Female_Ratio, y = Constituency, label = Constituency),
    color = "Blue"
  ) +
  theme(axis.text.y = element_blank())

We can observe an outlier, let’s find more about it:

outlier<- data_copy4|>
  filter(Male_to_Female_Ratio>75)

print(outlier)

## # A tibble: 1 × 4
##   Constituency Male_Count Female_Count Male_to_Female_Ratio
##   <chr>             <int>        <int>                <dbl>
## 1 Nizamabad           182            1                   91

Plotting the equities for the winning candidates compared to the mean equity amount

mean_equity_amt <- mean(data_copy3$Equity_amt, na.rm=TRUE)

data_copy3$color_ <- ifelse(data_copy3$Equity_amt > mean_equity_amt, "Above Mean", "Below Mean")
data_copy3|>
  filter(Winner==1)|>
  ggplot()+
  geom_jitter(mapping = aes(x=Equity_amt,y=Gender,color=color_))

Finding correlation among various numeric factors in our data set.

The positive return value indicates that the ‘x’ value tends to increase as a function of ‘y’ value and vice versa.

cor(x = data_copy3$Age, y = data_copy3$Criminal.Cases, use = "complete.obs")

## [1] 0.02234815

cor(x = data_copy3$Criminal.Cases, data_copy3$Winner, use = "complete.obs")

## [1] -0.02815683

cor(x = data_copy3$Age, data_copy3$Total.Assets, use = "complete.obs")

## [1] 0.111774

cor(x = data_copy3$Age, data_copy3$Equity_amt, use = "complete.obs")

## [1] 0.1083645

Loksabha_data_dive_5

2023-10-01

Dataset

Loading our dataset

Calculating Equity of each candidate and storing in a new column:

Age Deviation for the each party’s candidates

Ranking based on Equity amount ( For accessing the candidate’s wealth )

Calculating Male to Female Ratio for each constituency.

We can observe an outlier, let’s find more about it:

Plotting the equities for the winning candidates compared to the mean equity amount

Finding correlation among various numeric factors in our data set.

The positive return value indicates that the ‘x’ value tends to increase as a function of ‘y’ value and vice versa.