library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(scales)

The analysis is on whether the voter would want Republicans or Democrats in Congress. The dataset is taken from fiveThirtyEight and link to the page is https://projects.fivethirtyeight.com/polls/generic-ballot/

As per my understanding there are various pollster organization who take survey on random sample of voters and register their answer if they want Replublican or Democrats in Congress.

Getting started

Firstly, I have uploaded the data in the github and fetched the data in raw format and push the dataset in the dataframe. The dimension of the data is 497 X 39

data <- read.csv('https://raw.githubusercontent.com/karmaggyatso/CUNY_SPS/main/generic_ballot_polls.csv', header = FALSE, sep = ',')
polls <- data.frame(data) 

polls
dim(polls)
## [1] 497  39

Subset

Created a subset of a dataframe and selected only the required columns from the original dataset.

result <- subset(polls, select=c('V3', 'V5',  'V18', 'V11', 'V12', 'V34', 'V37', 'V38'))
result<-result[-1,]
result

Renaming the columns names

Renamed the column name

colnames(result) <- c('Pollster', 'Sponsers', 'Sample Size', 'State','Start_date', 'Election Date', 'Dem', 'Rep')
result

Adding a column and data in the column

I used the mutate function to update the subset dataframe and added a additional column where it shows the differences in percentile from each organization. I used the case_when function to display appropriately. as.numeric function had to used because the value were type character.

result <- result %>%
  mutate(LeaderBoard = case_when(
    as.numeric(Dem)> as.numeric(Rep) ~ paste("Dem +", as.numeric(Dem) - as.numeric(Rep), sep = ""),
    as.numeric(Rep) > as.numeric(Dem) ~ paste("Rep +", as.numeric(Rep) - as.numeric(Dem), sep = "")
    ))
result

Graph

A graphical representation of showing people voting for their favorite party with the timeline.

result$Dem<-as.numeric(result$Dem)/100
result$Rep<-as.numeric(result$Rep)/100
percent(as.numeric(result$Dem))
##   [1] "42.00%" "46.40%" "41.40%" "46.00%" "47.00%" "47.00%" "45.00%" "43.60%"
##   [9] "48.30%" "45.00%" "42.00%" "47.00%" "43.60%" "45.00%" "42.18%" "43.70%"
##  [17] "50.00%" "48.00%" "38.00%" "45.00%" "47.00%" "47.20%" "41.00%" "41.60%"
##  [25] "45.10%" "45.00%" "46.00%" "43.00%" "44.00%" "40.90%" "46.20%" "43.00%"
##  [33] "44.30%" "41.00%" "44.00%" "45.00%" "44.00%" "35.00%" "44.80%" "43.00%"
##  [41] "35.00%" "44.00%" "44.00%" "44.00%" "39.00%" "45.00%" "50.00%" "49.00%"
##  [49] "30.00%" "45.00%" "43.00%" "41.70%" "45.90%" "50.00%" "44.00%" "41.00%"
##  [57] "43.60%" "44.30%" "45.00%" "47.00%" "45.00%" "43.73%" "43.00%" "40.20%"
##  [65] "39.00%" "43.70%" "46.70%" "48.00%" "42.60%" "48.00%" "49.00%" "43.00%"
##  [73] "45.00%" "45.00%" "50.00%" "39.30%" "44.50%" "39.00%" "41.00%" "46.00%"
##  [81] "43.00%" "37.00%" "43.00%" "46.00%" "42.00%" "46.00%" "40.00%" "41.00%"
##  [89] "43.00%" "42.00%" "34.00%" "42.00%" "52.00%" "42.50%" "45.00%" "29.00%"
##  [97] "40.50%" "47.40%" "41.00%" "41.00%" "50.00%" "42.00%" "43.30%" "40.00%"
## [105] "42.00%" "45.00%" "39.00%" "47.00%" "46.00%" "42.00%" "48.00%" "45.00%"
## [113] "45.00%" "39.80%" "40.00%" "44.00%" "42.58%" "41.00%" "47.00%" "47.00%"
## [121] "42.00%" "43.00%" "44.70%" "38.80%" "43.60%" "41.00%" "40.00%" "42.00%"
## [129] "44.00%" "38.00%" "43.00%" "44.00%" "39.00%" "47.00%" "39.00%" "43.00%"
## [137] "40.00%" "41.00%" "27.00%" "42.00%" "37.40%" "43.00%" "40.00%" "43.00%"
## [145] "44.00%" "41.00%" "42.30%" "41.00%" "46.00%" "45.00%" "45.00%" "42.00%"
## [153] "38.00%" "37.30%" "42.90%" "41.15%" "49.00%" "39.00%" "43.00%" "44.00%"
## [161] "42.00%" "44.00%" "42.00%" "43.00%" "45.10%" "42.00%" "47.00%" "51.00%"
## [169] "46.00%" "44.00%" "44.00%" "43.00%" "44.00%" "44.00%" "39.20%" "45.30%"
## [177] "42.00%" "43.00%" "37.00%" "42.00%" "27.00%" "43.00%" "53.00%" "43.00%"
## [185] "39.00%" "44.00%" "44.00%" "46.00%" "42.10%" "39.00%" "40.60%" "43.00%"
## [193] "44.00%" "43.00%" "40.00%" "42.00%" "39.00%" "42.00%" "37.00%" "44.20%"
## [201] "37.00%" "44.00%" "50.00%" "39.40%" "45.00%" "46.00%" "42.70%" "39.70%"
## [209] "45.00%" "49.00%" "43.00%" "39.00%" "41.60%" "40.00%" "40.70%" "44.00%"
## [217] "41.90%" "43.00%" "36.00%" "44.00%" "43.00%" "43.00%" "43.00%" "40.70%"
## [225] "42.00%" "37.80%" "45.00%" "34.00%" "47.00%" "39.31%" "43.10%" "44.00%"
## [233] "44.00%" "42.00%" "45.00%" "45.00%" "41.00%" "42.00%" "50.63%" "41.80%"
## [241] "45.90%" "44.00%" "39.00%" "39.80%" "44.00%" "42.00%" "36.00%" "42.00%"
## [249] "46.00%" "46.00%" "40.00%" "43.00%" "36.50%" "41.90%" "40.00%" "44.30%"
## [257] "41.00%" "43.00%" "42.00%" "42.20%" "42.00%" "36.00%" "42.00%" "38.50%"
## [265] "44.50%" "49.00%" "38.92%" "45.00%" "42.00%" "45.00%" "39.00%" "41.00%"
## [273] "43.90%" "45.00%" "45.00%" "45.00%" "41.40%" "38.90%" "37.00%" "44.00%"
## [281] "42.20%" "43.00%" "44.00%" "41.00%" "39.20%" "44.50%" "42.00%" "41.00%"
## [289] "42.60%" "43.00%" "42.00%" "42.00%" "41.90%" "43.00%" "44.00%" "44.00%"
## [297] "38.10%" "44.10%" "43.00%" "44.80%" "42.00%" "43.70%" "43.00%" "43.00%"
## [305] "42.00%" "42.00%" "46.00%" "47.00%" "43.00%" "42.90%" "47.00%" "43.00%"
## [313] "43.00%" "41.00%" "42.40%" "39.80%" "42.50%" "39.00%" "42.40%" "39.70%"
## [321] "42.00%" "43.00%" "44.00%" "40.00%" "42.30%" "44.00%" "38.50%" "42.50%"
## [329] "43.00%" "41.00%" "45.00%" "42.00%" "42.00%" "43.00%" "43.50%" "40.00%"
## [337] "42.00%" "39.00%" "39.00%" "43.00%" "47.00%" "39.00%" "44.00%" "38.00%"
## [345] "45.00%" "42.00%" "44.00%" "34.00%" "50.00%" "37.00%" "35.00%" "42.00%"
## [353] "40.00%" "41.00%" "42.00%" "38.00%" "43.00%" "46.00%" "46.00%" "49.00%"
## [361] "42.00%" "44.00%" "43.00%" "38.00%" "41.00%" "38.00%" "43.00%" "41.00%"
## [369] "44.00%" "32.00%" "36.00%" "44.00%" "37.70%" "42.00%" "50.00%" "49.00%"
## [377] "44.00%" "42.00%" "40.00%" "42.00%" "44.00%" "47.00%" "43.00%" "44.00%"
## [385] "44.00%" "37.00%" "42.00%" "46.00%" "46.00%" "45.00%" "47.00%" "43.00%"
## [393] "39.00%" "40.00%" "45.00%" "40.00%" "43.00%" "40.00%" "43.00%" "43.00%"
## [401] "43.00%" "44.00%" "37.00%" "37.00%" "52.00%" "45.00%" "46.00%" "45.00%"
## [409] "47.00%" "50.00%" "41.00%" "52.00%" "44.00%" "45.00%" "46.00%" "43.00%"
## [417] "45.00%" "47.00%" "43.00%" "40.00%" "46.00%" "42.00%" "43.00%" "45.00%"
## [425] "45.00%" "46.00%" "49.00%" "45.00%" "40.00%" "44.00%" "50.00%" "38.00%"
## [433] "46.00%" "43.00%" "47.00%" "40.00%" "45.00%" "42.00%" "45.00%" "46.00%"
## [441] "45.00%" "45.00%" "44.00%" "43.00%" "41.00%" "48.00%" "44.00%" "44.00%"
## [449] "41.70%" "43.00%" "44.00%" "41.00%" "44.00%" "46.00%" "46.00%" "46.00%"
## [457] "37.00%" "41.00%" "46.00%" "46.00%" "46.00%" "41.00%" "43.00%" "46.00%"
## [465] "39.00%" "49.00%" "50.00%" "47.00%" "46.00%" "50.00%" "45.00%" "41.00%"
## [473] "44.00%" "43.90%" "41.00%" "43.00%" "45.00%" "46.00%" "45.00%" "47.00%"
## [481] "48.00%" "43.00%" "39.00%" "46.00%" "45.00%" "39.80%" "46.00%" "39.00%"
## [489] "46.00%" "39.00%" "48.00%" "48.00%" "47.00%" "44.00%" "46.00%" "45.00%"
percent(as.numeric(result$Rep))
##   [1] "47.00%" "37.70%" "47.20%" "42.00%" "43.00%" "42.00%" "46.00%" "35.70%"
##   [9] "40.80%" "47.00%" "47.00%" "44.00%" "44.60%" "45.00%" "44.98%" "38.50%"
##  [17] "42.00%" "44.00%" "33.00%" "39.00%" "42.00%" "46.50%" "46.00%" "46.90%"
##  [25] "38.70%" "47.00%" "42.00%" "47.00%" "42.00%" "37.20%" "41.80%" "46.00%"
##  [33] "38.40%" "41.00%" "42.00%" "44.00%" "43.00%" "31.00%" "44.30%" "46.00%"
##  [41] "33.00%" "48.00%" "38.70%" "45.00%" "32.00%" "39.00%" "43.00%" "46.00%"
##  [49] "30.00%" "43.00%" "45.00%" "37.50%" "42.30%" "50.00%" "45.00%" "46.00%"
##  [57] "38.30%" "40.10%" "48.00%" "41.00%" "41.00%" "42.52%" "48.20%" "48.30%"
##  [65] "49.00%" "44.70%" "47.30%" "46.00%" "40.10%" "44.00%" "42.00%" "44.00%"
##  [73] "44.00%" "41.00%" "48.00%" "38.00%" "40.70%" "47.00%" "44.00%" "46.00%"
##  [81] "39.60%" "32.00%" "39.00%" "42.00%" "44.00%" "41.00%" "48.00%" "40.00%"
##  [89] "44.00%" "47.00%" "35.00%" "50.00%" "48.00%" "40.40%" "46.00%" "29.00%"
##  [97] "36.30%" "41.60%" "45.00%" "49.00%" "50.00%" "47.00%" "45.70%" "45.00%"
## [105] "46.00%" "38.00%" "32.00%" "47.00%" "48.00%" "39.00%" "41.00%" "42.00%"
## [113] "46.00%" "49.40%" "48.00%" "48.00%" "42.76%" "45.00%" "46.00%" "45.00%"
## [121] "42.00%" "47.00%" "48.90%" "36.90%" "42.20%" "46.00%" "39.50%" "46.00%"
## [129] "47.00%" "34.00%" "39.00%" "42.00%" "48.00%" "44.00%" "37.00%" "45.00%"
## [137] "44.00%" "46.00%" "29.00%" "42.00%" "38.00%" "43.80%" "48.00%" "43.00%"
## [145] "45.00%" "47.00%" "45.20%" "45.00%" "43.00%" "45.00%" "41.00%" "38.00%"
## [153] "36.00%" "37.60%" "43.20%" "43.27%" "51.00%" "48.00%" "40.00%" "42.00%"
## [161] "41.00%" "42.00%" "47.00%" "47.00%" "47.80%" "47.00%" "42.00%" "49.00%"
## [169] "46.00%" "42.00%" "48.00%" "50.00%" "38.00%" "45.00%" "36.80%" "42.00%"
## [177] "43.00%" "39.00%" "33.00%" "49.00%" "29.00%" "43.00%" "47.00%" "43.00%"
## [185] "46.00%" "45.00%" "45.00%" "45.00%" "39.30%" "49.00%" "46.80%" "48.00%"
## [193] "47.00%" "42.00%" "44.00%" "45.00%" "51.00%" "36.00%" "37.10%" "42.20%"
## [201] "34.00%" "40.00%" "50.00%" "48.10%" "47.00%" "46.00%" "38.20%" "40.70%"
## [209] "47.40%" "51.00%" "42.00%" "47.00%" "39.20%" "43.00%" "35.90%" "43.10%"
## [217] "37.70%" "43.00%" "34.00%" "41.00%" "44.00%" "47.00%" "46.00%" "38.60%"
## [225] "41.00%" "38.40%" "42.30%" "40.00%" "53.00%" "40.96%" "38.70%" "46.00%"
## [233] "48.00%" "41.00%" "45.00%" "44.00%" "43.00%" "40.00%" "49.28%" "47.10%"
## [241] "46.70%" "47.00%" "50.00%" "40.60%" "45.00%" "43.00%" "33.00%" "38.00%"
## [249] "45.00%" "46.00%" "41.00%" "43.00%" "38.50%" "44.40%" "48.00%" "38.80%"
## [257] "46.00%" "41.00%" "43.00%" "38.30%" "41.00%" "33.00%" "39.00%" "36.70%"
## [265] "41.20%" "51.00%" "41.94%" "43.00%" "49.00%" "44.00%" "40.00%" "42.00%"
## [273] "37.60%" "49.00%" "47.00%" "41.00%" "49.90%" "37.30%" "50.00%" "44.00%"
## [281] "37.20%" "45.00%" "46.00%" "43.00%" "35.50%" "41.50%" "45.00%" "47.00%"
## [289] "39.30%" "41.00%" "34.00%" "43.00%" "54.40%" "44.00%" "38.00%" "41.00%"
## [297] "37.40%" "43.80%" "47.00%" "47.10%" "38.50%" "41.90%" "50.00%" "51.00%"
## [305] "42.00%" "49.00%" "41.00%" "53.00%" "44.00%" "38.60%" "46.00%" "48.00%"
## [313] "44.00%" "42.00%" "55.60%" "34.70%" "41.60%" "48.00%" "37.20%" "45.20%"
## [321] "43.00%" "44.00%" "41.00%" "42.00%" "37.80%" "46.00%" "37.30%" "42.80%"
## [329] "36.00%" "41.00%" "44.00%" "37.00%" "40.00%" "48.00%" "39.60%" "33.00%"
## [337] "38.00%" "43.00%" "48.00%" "42.00%" "45.00%" "31.00%" "35.00%" "44.00%"
## [345] "48.00%" "39.00%" "41.00%" "44.00%" "50.00%" "45.00%" "38.00%" "37.00%"
## [353] "40.00%" "44.00%" "40.00%" "32.00%" "39.00%" "41.00%" "44.00%" "44.00%"
## [361] "38.00%" "48.00%" "43.00%" "46.00%" "46.00%" "51.00%" "46.00%" "51.00%"
## [369] "37.00%" "29.00%" "33.00%" "42.00%" "45.90%" "48.70%" "40.00%" "44.00%"
## [377] "37.00%" "42.00%" "41.00%" "45.00%" "38.00%" "45.00%" "43.00%" "42.00%"
## [385] "41.00%" "32.00%" "37.00%" "42.00%" "45.00%" "40.00%" "48.00%" "41.00%"
## [393] "49.00%" "41.00%" "37.00%" "40.00%" "41.00%" "41.00%" "36.00%" "42.00%"
## [401] "46.00%" "47.00%" "31.00%" "29.00%" "48.00%" "41.00%" "38.00%" "41.00%"
## [409] "40.00%" "40.00%" "40.00%" "48.00%" "40.00%" "40.00%" "47.00%" "39.00%"
## [417] "42.00%" "43.00%" "37.00%" "40.00%" "43.00%" "42.00%" "37.00%" "40.00%"
## [425] "44.00%" "45.00%" "45.00%" "38.00%" "40.00%" "37.00%" "40.00%" "39.00%"
## [433] "48.00%" "38.00%" "46.00%" "37.00%" "37.00%" "40.00%" "37.00%" "48.00%"
## [441] "42.00%" "44.00%" "42.00%" "36.00%" "38.00%" "41.00%" "35.00%" "37.00%"
## [449] "42.10%" "37.00%" "36.00%" "38.00%" "37.00%" "42.00%" "44.00%" "47.00%"
## [457] "30.00%" "34.00%" "42.00%" "37.00%" "46.00%" "37.00%" "35.00%" "37.00%"
## [465] "37.00%" "40.00%" "41.00%" "38.00%" "47.00%" "40.00%" "39.00%" "36.00%"
## [473] "37.00%" "41.40%" "40.00%" "38.00%" "38.00%" "42.00%" "37.00%" "42.00%"
## [481] "43.00%" "39.00%" "31.00%" "46.40%" "41.00%" "42.70%" "42.00%" "31.00%"
## [489] "46.00%" "38.00%" "42.00%" "42.00%" "43.00%" "44.00%" "48.00%" "47.00%"
ggplot(result, aes(x=`Start_date`))+
  geom_point(aes(y=as.numeric(Dem)), color='blue')+
  geom_point(aes(y=as.numeric(Rep)), color='red')+
  scale_y_continuous(labels = percent, limits = c(0,1))+
  scale_x_discrete(guide = guide_axis(check.overlap = TRUE)
                  )

class(result$Dem)
## [1] "numeric"
round(mean(as.numeric(result$Dem)))
## [1] 0
round(mean(as.numeric(result$Rep)))
## [1] 0

Conclusion

According to the poll, the mean of the Democrats is 43% amd Replublican is 42%. The difference is 1% and it shows that maximum number of people wants Democrats in the Congress.