library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
The analysis is on whether the voter would want Republicans or Democrats in Congress. The dataset is taken from fiveThirtyEight and link to the page is https://projects.fivethirtyeight.com/polls/generic-ballot/
As per my understanding there are various pollster organization who take survey on random sample of voters and register their answer if they want Replublican or Democrats in Congress.
Firstly, I have uploaded the data in the github and fetched the data in raw format and push the dataset in the dataframe. The dimension of the data is 497 X 39
data <- read.csv('https://raw.githubusercontent.com/karmaggyatso/CUNY_SPS/main/generic_ballot_polls.csv', header = FALSE, sep = ',')
polls <- data.frame(data)
polls
dim(polls)
## [1] 497 39
Created a subset of a dataframe and selected only the required columns from the original dataset.
result <- subset(polls, select=c('V3', 'V5', 'V18', 'V11', 'V12', 'V34', 'V37', 'V38'))
result<-result[-1,]
result
Renamed the column name
colnames(result) <- c('Pollster', 'Sponsers', 'Sample Size', 'State','Start_date', 'Election Date', 'Dem', 'Rep')
result
I used the mutate function to update the subset dataframe and added a additional column where it shows the differences in percentile from each organization. I used the case_when function to display appropriately. as.numeric function had to used because the value were type character.
result <- result %>%
mutate(LeaderBoard = case_when(
as.numeric(Dem)> as.numeric(Rep) ~ paste("Dem +", as.numeric(Dem) - as.numeric(Rep), sep = ""),
as.numeric(Rep) > as.numeric(Dem) ~ paste("Rep +", as.numeric(Rep) - as.numeric(Dem), sep = "")
))
result
A graphical representation of showing people voting for their favorite party with the timeline.
result$Dem<-as.numeric(result$Dem)/100
result$Rep<-as.numeric(result$Rep)/100
percent(as.numeric(result$Dem))
## [1] "42.00%" "46.40%" "41.40%" "46.00%" "47.00%" "47.00%" "45.00%" "43.60%"
## [9] "48.30%" "45.00%" "42.00%" "47.00%" "43.60%" "45.00%" "42.18%" "43.70%"
## [17] "50.00%" "48.00%" "38.00%" "45.00%" "47.00%" "47.20%" "41.00%" "41.60%"
## [25] "45.10%" "45.00%" "46.00%" "43.00%" "44.00%" "40.90%" "46.20%" "43.00%"
## [33] "44.30%" "41.00%" "44.00%" "45.00%" "44.00%" "35.00%" "44.80%" "43.00%"
## [41] "35.00%" "44.00%" "44.00%" "44.00%" "39.00%" "45.00%" "50.00%" "49.00%"
## [49] "30.00%" "45.00%" "43.00%" "41.70%" "45.90%" "50.00%" "44.00%" "41.00%"
## [57] "43.60%" "44.30%" "45.00%" "47.00%" "45.00%" "43.73%" "43.00%" "40.20%"
## [65] "39.00%" "43.70%" "46.70%" "48.00%" "42.60%" "48.00%" "49.00%" "43.00%"
## [73] "45.00%" "45.00%" "50.00%" "39.30%" "44.50%" "39.00%" "41.00%" "46.00%"
## [81] "43.00%" "37.00%" "43.00%" "46.00%" "42.00%" "46.00%" "40.00%" "41.00%"
## [89] "43.00%" "42.00%" "34.00%" "42.00%" "52.00%" "42.50%" "45.00%" "29.00%"
## [97] "40.50%" "47.40%" "41.00%" "41.00%" "50.00%" "42.00%" "43.30%" "40.00%"
## [105] "42.00%" "45.00%" "39.00%" "47.00%" "46.00%" "42.00%" "48.00%" "45.00%"
## [113] "45.00%" "39.80%" "40.00%" "44.00%" "42.58%" "41.00%" "47.00%" "47.00%"
## [121] "42.00%" "43.00%" "44.70%" "38.80%" "43.60%" "41.00%" "40.00%" "42.00%"
## [129] "44.00%" "38.00%" "43.00%" "44.00%" "39.00%" "47.00%" "39.00%" "43.00%"
## [137] "40.00%" "41.00%" "27.00%" "42.00%" "37.40%" "43.00%" "40.00%" "43.00%"
## [145] "44.00%" "41.00%" "42.30%" "41.00%" "46.00%" "45.00%" "45.00%" "42.00%"
## [153] "38.00%" "37.30%" "42.90%" "41.15%" "49.00%" "39.00%" "43.00%" "44.00%"
## [161] "42.00%" "44.00%" "42.00%" "43.00%" "45.10%" "42.00%" "47.00%" "51.00%"
## [169] "46.00%" "44.00%" "44.00%" "43.00%" "44.00%" "44.00%" "39.20%" "45.30%"
## [177] "42.00%" "43.00%" "37.00%" "42.00%" "27.00%" "43.00%" "53.00%" "43.00%"
## [185] "39.00%" "44.00%" "44.00%" "46.00%" "42.10%" "39.00%" "40.60%" "43.00%"
## [193] "44.00%" "43.00%" "40.00%" "42.00%" "39.00%" "42.00%" "37.00%" "44.20%"
## [201] "37.00%" "44.00%" "50.00%" "39.40%" "45.00%" "46.00%" "42.70%" "39.70%"
## [209] "45.00%" "49.00%" "43.00%" "39.00%" "41.60%" "40.00%" "40.70%" "44.00%"
## [217] "41.90%" "43.00%" "36.00%" "44.00%" "43.00%" "43.00%" "43.00%" "40.70%"
## [225] "42.00%" "37.80%" "45.00%" "34.00%" "47.00%" "39.31%" "43.10%" "44.00%"
## [233] "44.00%" "42.00%" "45.00%" "45.00%" "41.00%" "42.00%" "50.63%" "41.80%"
## [241] "45.90%" "44.00%" "39.00%" "39.80%" "44.00%" "42.00%" "36.00%" "42.00%"
## [249] "46.00%" "46.00%" "40.00%" "43.00%" "36.50%" "41.90%" "40.00%" "44.30%"
## [257] "41.00%" "43.00%" "42.00%" "42.20%" "42.00%" "36.00%" "42.00%" "38.50%"
## [265] "44.50%" "49.00%" "38.92%" "45.00%" "42.00%" "45.00%" "39.00%" "41.00%"
## [273] "43.90%" "45.00%" "45.00%" "45.00%" "41.40%" "38.90%" "37.00%" "44.00%"
## [281] "42.20%" "43.00%" "44.00%" "41.00%" "39.20%" "44.50%" "42.00%" "41.00%"
## [289] "42.60%" "43.00%" "42.00%" "42.00%" "41.90%" "43.00%" "44.00%" "44.00%"
## [297] "38.10%" "44.10%" "43.00%" "44.80%" "42.00%" "43.70%" "43.00%" "43.00%"
## [305] "42.00%" "42.00%" "46.00%" "47.00%" "43.00%" "42.90%" "47.00%" "43.00%"
## [313] "43.00%" "41.00%" "42.40%" "39.80%" "42.50%" "39.00%" "42.40%" "39.70%"
## [321] "42.00%" "43.00%" "44.00%" "40.00%" "42.30%" "44.00%" "38.50%" "42.50%"
## [329] "43.00%" "41.00%" "45.00%" "42.00%" "42.00%" "43.00%" "43.50%" "40.00%"
## [337] "42.00%" "39.00%" "39.00%" "43.00%" "47.00%" "39.00%" "44.00%" "38.00%"
## [345] "45.00%" "42.00%" "44.00%" "34.00%" "50.00%" "37.00%" "35.00%" "42.00%"
## [353] "40.00%" "41.00%" "42.00%" "38.00%" "43.00%" "46.00%" "46.00%" "49.00%"
## [361] "42.00%" "44.00%" "43.00%" "38.00%" "41.00%" "38.00%" "43.00%" "41.00%"
## [369] "44.00%" "32.00%" "36.00%" "44.00%" "37.70%" "42.00%" "50.00%" "49.00%"
## [377] "44.00%" "42.00%" "40.00%" "42.00%" "44.00%" "47.00%" "43.00%" "44.00%"
## [385] "44.00%" "37.00%" "42.00%" "46.00%" "46.00%" "45.00%" "47.00%" "43.00%"
## [393] "39.00%" "40.00%" "45.00%" "40.00%" "43.00%" "40.00%" "43.00%" "43.00%"
## [401] "43.00%" "44.00%" "37.00%" "37.00%" "52.00%" "45.00%" "46.00%" "45.00%"
## [409] "47.00%" "50.00%" "41.00%" "52.00%" "44.00%" "45.00%" "46.00%" "43.00%"
## [417] "45.00%" "47.00%" "43.00%" "40.00%" "46.00%" "42.00%" "43.00%" "45.00%"
## [425] "45.00%" "46.00%" "49.00%" "45.00%" "40.00%" "44.00%" "50.00%" "38.00%"
## [433] "46.00%" "43.00%" "47.00%" "40.00%" "45.00%" "42.00%" "45.00%" "46.00%"
## [441] "45.00%" "45.00%" "44.00%" "43.00%" "41.00%" "48.00%" "44.00%" "44.00%"
## [449] "41.70%" "43.00%" "44.00%" "41.00%" "44.00%" "46.00%" "46.00%" "46.00%"
## [457] "37.00%" "41.00%" "46.00%" "46.00%" "46.00%" "41.00%" "43.00%" "46.00%"
## [465] "39.00%" "49.00%" "50.00%" "47.00%" "46.00%" "50.00%" "45.00%" "41.00%"
## [473] "44.00%" "43.90%" "41.00%" "43.00%" "45.00%" "46.00%" "45.00%" "47.00%"
## [481] "48.00%" "43.00%" "39.00%" "46.00%" "45.00%" "39.80%" "46.00%" "39.00%"
## [489] "46.00%" "39.00%" "48.00%" "48.00%" "47.00%" "44.00%" "46.00%" "45.00%"
percent(as.numeric(result$Rep))
## [1] "47.00%" "37.70%" "47.20%" "42.00%" "43.00%" "42.00%" "46.00%" "35.70%"
## [9] "40.80%" "47.00%" "47.00%" "44.00%" "44.60%" "45.00%" "44.98%" "38.50%"
## [17] "42.00%" "44.00%" "33.00%" "39.00%" "42.00%" "46.50%" "46.00%" "46.90%"
## [25] "38.70%" "47.00%" "42.00%" "47.00%" "42.00%" "37.20%" "41.80%" "46.00%"
## [33] "38.40%" "41.00%" "42.00%" "44.00%" "43.00%" "31.00%" "44.30%" "46.00%"
## [41] "33.00%" "48.00%" "38.70%" "45.00%" "32.00%" "39.00%" "43.00%" "46.00%"
## [49] "30.00%" "43.00%" "45.00%" "37.50%" "42.30%" "50.00%" "45.00%" "46.00%"
## [57] "38.30%" "40.10%" "48.00%" "41.00%" "41.00%" "42.52%" "48.20%" "48.30%"
## [65] "49.00%" "44.70%" "47.30%" "46.00%" "40.10%" "44.00%" "42.00%" "44.00%"
## [73] "44.00%" "41.00%" "48.00%" "38.00%" "40.70%" "47.00%" "44.00%" "46.00%"
## [81] "39.60%" "32.00%" "39.00%" "42.00%" "44.00%" "41.00%" "48.00%" "40.00%"
## [89] "44.00%" "47.00%" "35.00%" "50.00%" "48.00%" "40.40%" "46.00%" "29.00%"
## [97] "36.30%" "41.60%" "45.00%" "49.00%" "50.00%" "47.00%" "45.70%" "45.00%"
## [105] "46.00%" "38.00%" "32.00%" "47.00%" "48.00%" "39.00%" "41.00%" "42.00%"
## [113] "46.00%" "49.40%" "48.00%" "48.00%" "42.76%" "45.00%" "46.00%" "45.00%"
## [121] "42.00%" "47.00%" "48.90%" "36.90%" "42.20%" "46.00%" "39.50%" "46.00%"
## [129] "47.00%" "34.00%" "39.00%" "42.00%" "48.00%" "44.00%" "37.00%" "45.00%"
## [137] "44.00%" "46.00%" "29.00%" "42.00%" "38.00%" "43.80%" "48.00%" "43.00%"
## [145] "45.00%" "47.00%" "45.20%" "45.00%" "43.00%" "45.00%" "41.00%" "38.00%"
## [153] "36.00%" "37.60%" "43.20%" "43.27%" "51.00%" "48.00%" "40.00%" "42.00%"
## [161] "41.00%" "42.00%" "47.00%" "47.00%" "47.80%" "47.00%" "42.00%" "49.00%"
## [169] "46.00%" "42.00%" "48.00%" "50.00%" "38.00%" "45.00%" "36.80%" "42.00%"
## [177] "43.00%" "39.00%" "33.00%" "49.00%" "29.00%" "43.00%" "47.00%" "43.00%"
## [185] "46.00%" "45.00%" "45.00%" "45.00%" "39.30%" "49.00%" "46.80%" "48.00%"
## [193] "47.00%" "42.00%" "44.00%" "45.00%" "51.00%" "36.00%" "37.10%" "42.20%"
## [201] "34.00%" "40.00%" "50.00%" "48.10%" "47.00%" "46.00%" "38.20%" "40.70%"
## [209] "47.40%" "51.00%" "42.00%" "47.00%" "39.20%" "43.00%" "35.90%" "43.10%"
## [217] "37.70%" "43.00%" "34.00%" "41.00%" "44.00%" "47.00%" "46.00%" "38.60%"
## [225] "41.00%" "38.40%" "42.30%" "40.00%" "53.00%" "40.96%" "38.70%" "46.00%"
## [233] "48.00%" "41.00%" "45.00%" "44.00%" "43.00%" "40.00%" "49.28%" "47.10%"
## [241] "46.70%" "47.00%" "50.00%" "40.60%" "45.00%" "43.00%" "33.00%" "38.00%"
## [249] "45.00%" "46.00%" "41.00%" "43.00%" "38.50%" "44.40%" "48.00%" "38.80%"
## [257] "46.00%" "41.00%" "43.00%" "38.30%" "41.00%" "33.00%" "39.00%" "36.70%"
## [265] "41.20%" "51.00%" "41.94%" "43.00%" "49.00%" "44.00%" "40.00%" "42.00%"
## [273] "37.60%" "49.00%" "47.00%" "41.00%" "49.90%" "37.30%" "50.00%" "44.00%"
## [281] "37.20%" "45.00%" "46.00%" "43.00%" "35.50%" "41.50%" "45.00%" "47.00%"
## [289] "39.30%" "41.00%" "34.00%" "43.00%" "54.40%" "44.00%" "38.00%" "41.00%"
## [297] "37.40%" "43.80%" "47.00%" "47.10%" "38.50%" "41.90%" "50.00%" "51.00%"
## [305] "42.00%" "49.00%" "41.00%" "53.00%" "44.00%" "38.60%" "46.00%" "48.00%"
## [313] "44.00%" "42.00%" "55.60%" "34.70%" "41.60%" "48.00%" "37.20%" "45.20%"
## [321] "43.00%" "44.00%" "41.00%" "42.00%" "37.80%" "46.00%" "37.30%" "42.80%"
## [329] "36.00%" "41.00%" "44.00%" "37.00%" "40.00%" "48.00%" "39.60%" "33.00%"
## [337] "38.00%" "43.00%" "48.00%" "42.00%" "45.00%" "31.00%" "35.00%" "44.00%"
## [345] "48.00%" "39.00%" "41.00%" "44.00%" "50.00%" "45.00%" "38.00%" "37.00%"
## [353] "40.00%" "44.00%" "40.00%" "32.00%" "39.00%" "41.00%" "44.00%" "44.00%"
## [361] "38.00%" "48.00%" "43.00%" "46.00%" "46.00%" "51.00%" "46.00%" "51.00%"
## [369] "37.00%" "29.00%" "33.00%" "42.00%" "45.90%" "48.70%" "40.00%" "44.00%"
## [377] "37.00%" "42.00%" "41.00%" "45.00%" "38.00%" "45.00%" "43.00%" "42.00%"
## [385] "41.00%" "32.00%" "37.00%" "42.00%" "45.00%" "40.00%" "48.00%" "41.00%"
## [393] "49.00%" "41.00%" "37.00%" "40.00%" "41.00%" "41.00%" "36.00%" "42.00%"
## [401] "46.00%" "47.00%" "31.00%" "29.00%" "48.00%" "41.00%" "38.00%" "41.00%"
## [409] "40.00%" "40.00%" "40.00%" "48.00%" "40.00%" "40.00%" "47.00%" "39.00%"
## [417] "42.00%" "43.00%" "37.00%" "40.00%" "43.00%" "42.00%" "37.00%" "40.00%"
## [425] "44.00%" "45.00%" "45.00%" "38.00%" "40.00%" "37.00%" "40.00%" "39.00%"
## [433] "48.00%" "38.00%" "46.00%" "37.00%" "37.00%" "40.00%" "37.00%" "48.00%"
## [441] "42.00%" "44.00%" "42.00%" "36.00%" "38.00%" "41.00%" "35.00%" "37.00%"
## [449] "42.10%" "37.00%" "36.00%" "38.00%" "37.00%" "42.00%" "44.00%" "47.00%"
## [457] "30.00%" "34.00%" "42.00%" "37.00%" "46.00%" "37.00%" "35.00%" "37.00%"
## [465] "37.00%" "40.00%" "41.00%" "38.00%" "47.00%" "40.00%" "39.00%" "36.00%"
## [473] "37.00%" "41.40%" "40.00%" "38.00%" "38.00%" "42.00%" "37.00%" "42.00%"
## [481] "43.00%" "39.00%" "31.00%" "46.40%" "41.00%" "42.70%" "42.00%" "31.00%"
## [489] "46.00%" "38.00%" "42.00%" "42.00%" "43.00%" "44.00%" "48.00%" "47.00%"
ggplot(result, aes(x=`Start_date`))+
geom_point(aes(y=as.numeric(Dem)), color='blue')+
geom_point(aes(y=as.numeric(Rep)), color='red')+
scale_y_continuous(labels = percent, limits = c(0,1))+
scale_x_discrete(guide = guide_axis(check.overlap = TRUE)
)
class(result$Dem)
## [1] "numeric"
round(mean(as.numeric(result$Dem)))
## [1] 0
round(mean(as.numeric(result$Rep)))
## [1] 0
According to the poll, the mean of the Democrats is 43% amd Replublican is 42%. The difference is 1% and it shows that maximum number of people wants Democrats in the Congress.