Dataset: Loksabha 2019 Candidates General Information. (https://www.kaggle.com/datasets/themlphdstudent/lok-sabha-election-candidate-list-2004-to-2019)
# Importing required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggrepel)
library(ggthemes)
# Loading our dataset
data <-read.csv('C:\\Users\\bhush\\Downloads\\Coursework\\I 590 INTRO TO R\\datasets\\data_final\\LokSabha2019_xl.csv')
head(data)
## Candidate Party Criminal.Cases
## 1 Kuldeep Rai Sharma INC 0
## 2 Ayan Mandal AITC 0
## 3 C G Saji Kumar All India Hindustan Congress Party 0
## 4 C U Rasheed IND 0
## 5 Gour Chandra Majumder IND 0
## 6 Henry IND 0
## Education Age Total.Assets Constituency
## 1 Graduate Professional 52 132233012 Andaman And Nicobar Islands
## 2 Graduate 30 7270440 Andaman And Nicobar Islands
## 3 12th Pass 48 120000 Andaman And Nicobar Islands
## 4 12th Pass 34 202808 Andaman And Nicobar Islands
## 5 Graduate 52 6062000 Andaman And Nicobar Islands
## 6 10th Pass 50 56459 Andaman And Nicobar Islands
## Liabilities Winner Gender
## 1 80450870 1 M
## 2 1500000 0 M
## 3 0 0 M
## 4 1700000 0 M
## 5 0 0 M
## 6 0 0 M
Using the timeframe from the year 2014 to 2020:
hits <- wp_trend(page=c("Indian elections","Loksabha","Modi","BJP"),
from="2014-01-01",
to="2020-12-31",
lang="en",
warn=TRUE)
hits <- hits |>
mutate(article = "Indian election")
hits <- hits |>
group_by(date) |>
summarise(views = sum(views))|>
ungroup()
hits <- hits |>
mutate(article = "Indian election")
hits <- hits|>
select(article, date, views)
hits$date <- as.Date(hits$date)
hits_ts <- as_tsibble(hits,index = date)
ggplot(hits_ts, aes(x=date, y=views)) +
geom_line(size=1.5, colour="steelblue") +
theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
hits_ts |>
index_by(month = floor_date(date, 'month')) |>
summarise(avg_views = mean(views)) |>
ggplot(mapping = aes(x = month, y = avg_views)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE, ) +
labs(title = "Indian elections",
subtitle = "(by each month)") +
scale_x_date(breaks = "5 month", labels = \(x) month(x)) +
theme_hc()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
hits_ts |>
index_by(week = floor_date(date, 'week')) |>
summarise(avg_views = mean(views)) |>
ggplot(mapping = aes(x = week, y = avg_views)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE, ) +
labs(title = "Indian elections",
subtitle = "(by weeks)") +
scale_x_date(breaks = "20 week", labels = \(x) week(x)) +
theme_hc()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
There is a large spike which occurs in the months in which the election dates are announced and they are coming closer.
hits_ts |>
filter_index("2014-01" ~ "2014-05")|>
ggplot(mapping = aes(x = date, y = views)) +
geom_line() +
geom_smooth(method = 'lm', color = 'blue', se=FALSE) +
labs(title = "Indian Elections") +
theme_hc()
## `geom_smooth()` using formula = 'y ~ x'
hits_ts |>
filter_index("2019-01" ~ "2019-05")|>
ggplot(mapping = aes(x = date, y = views)) +
geom_line() +
geom_smooth(method = 'lm', color = 'blue', se=FALSE) +
labs(title = "Indian Elections") +
theme_hc()
## `geom_smooth()` using formula = 'y ~ x'
hits_ts |>
filter_index("2014-06" ~ "2014-12")|>
ggplot(mapping = aes(x = date, y = views)) +
geom_line() +
geom_smooth(method = 'lm', color = 'blue', se=FALSE) +
labs(title = "Indian Elections") +
theme_hc()
## `geom_smooth()` using formula = 'y ~ x'
hits_ts |>
filter_index("2019-06" ~ "2019-12")|>
ggplot(mapping = aes(x = date, y = views)) +
geom_line() +
geom_smooth(method = 'lm', color = 'blue', se=FALSE) +
labs(title = "Indian Elections") +
theme_hc()
## `geom_smooth()` using formula = 'y ~ x'
We take a 5 month span before the election day and after the election day, and clearly can observe that trend line rises as the election day and results come near.
On the other hand, the next 5 months after the election results the trend line has a negative or constant slope.
hits_ts |>
index_by(month = floor_date(date, 'month')) |>
summarise(avg_views = mean(views)) |>
ggplot(mapping = aes(x = month, y = avg_views)) +
geom_line() +
geom_smooth(span = 0.3, color = 'blue', se=FALSE, ) +
labs(title = "Indian elections",
subtitle = "(by each month)") +
scale_x_date(breaks = "5 month", labels = \(x) month(x)) +
theme_hc()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
We can see two distinctive peaks in the 5th month of 2014 and 2019, this was peak time of the elections and the time span between these two is 5 years.
Similarly for 2009, we can also get a same kind of peak in the line plot.