Dataset: Loksabha 2019 Candidates General Information. (https://www.kaggle.com/datasets/themlphdstudent/lok-sabha-election-candidate-list-2004-to-2019)
# Importing required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggrepel)
data <-read.csv('C:\\Users\\bhush\\Downloads\\Coursework\\I 590 INTRO TO R\\datasets\\data_final\\LokSabha2019_xl.csv')
We know that “Assets - Liabilities = Equity” (More information on: Link)
data_copy <- data
# Calculating the equity for each party's candidates
data_copy_ <- data_copy|>
mutate(equity = data_copy$Total.Assets - data_copy$Liabilities)
# Finding the age deviation for different parties.
data_copy2 <- data_copy|>
group_by(Party,Winner)|>
mutate(Age_deviation = as.integer(Age - mean(Age)))|>
ungroup()|>
select(Candidate,Party,Age,Age_deviation,Winner)
data_copy3 <- data_copy |>
mutate(Equity_amt = data_copy$Total.Assets - data_copy$Liabilities)|>
mutate(real_wealth_rank = rank(desc(Equity_amt)))
data_copy3<- data_copy3 |>
arrange((real_wealth_rank))
data_copy4 <- data_copy |>
group_by(Constituency) |>
summarise(
Male_Count = sum(Gender == 'M'),
Female_Count = sum(Gender == 'F'),
)|>
mutate(Male_to_Female_Ratio = Male_Count / (Female_Count+1))|>
ungroup()
# Plotting Male to Female ratio against Constituency using a jitter plot
data_copy4 |>
ggplot() +
geom_jitter(mapping = aes(x = Male_to_Female_Ratio, y = Constituency)) +
geom_text_repel(
data = filter(data_copy4, Male_to_Female_Ratio > 24),
mapping = aes(x = Male_to_Female_Ratio, y = Constituency, label = Constituency),
color = "Blue"
) +
theme(axis.text.y = element_blank())
outlier<- data_copy4|>
filter(Male_to_Female_Ratio>75)
print(outlier)
## # A tibble: 1 × 4
## Constituency Male_Count Female_Count Male_to_Female_Ratio
## <chr> <int> <int> <dbl>
## 1 Nizamabad 182 1 91
mean_equity_amt <- mean(data_copy3$Equity_amt, na.rm=TRUE)
data_copy3$color_ <- ifelse(data_copy3$Equity_amt > mean_equity_amt, "Above Mean", "Below Mean")
data_copy3|>
filter(Winner==1)|>
ggplot()+
geom_jitter(mapping = aes(x=Equity_amt,y=Gender,color=color_))
cor(x = data_copy3$Age, y = data_copy3$Criminal.Cases, use = "complete.obs")
## [1] 0.02234815
cor(x = data_copy3$Criminal.Cases, data_copy3$Winner, use = "complete.obs")
## [1] -0.02815683
cor(x = data_copy3$Age, data_copy3$Total.Assets, use = "complete.obs")
## [1] 0.111774
cor(x = data_copy3$Age, data_copy3$Equity_amt, use = "complete.obs")
## [1] 0.1083645