subject: this column marks whose handling of COVID-19 the approval poll is about
party: party of respondents
startdate: start date of poll
enddate: end date of poll
pollster: organization that conducted the poll
sponsor: organization that sponsored the poll
samplesize: size of polling sample
population:
tracking: True if the poll is a tracking poll, meaning that the pollster is releasing data with overlapping samples
text: text of the question the pollster asked
url: link to the poll
modeldate: date of model run
grade: grade given to the pollster(website below)
weight: weight given to each poll in the model
multiversions: __*__ denotes that multiple versions of a poll in the raw data file were combined
# Load the datasets
#theURL <- "https://raw.githubusercontent.com/fivethirtyeight/covid-19-polls/master/covid_approval_polls.csv"
#theURL2 <- "https://raw.githubusercontent.com/letisalba/Data607-Assignment-Week1/main/covid_approval_polls_adjusted.csv"
#approvaldf <- read.csv(file = theURL , header = TRUE , sep = ",", na = ".")
#approval_adjusted <- read.csv(file = theURL2, header = TRUE, sep = ",", na = ".")
#head(approvaldf)
#head(approval_adjusted)
# Load the datasets from my GitHub
theURL <- "https://raw.githubusercontent.com/letisalba/Data607-Assignment-Week1/main/covid_approval_polls.csv"
theURL2 <- "https://raw.githubusercontent.com/letisalba/Data607-Assignment-Week1/main/covid_approval_polls_adjusted.csv"
approval <- read.csv(file = theURL , header = TRUE , sep = ",", na = ".")
approval_adjusted <- read.csv(file = theURL2, header = TRUE, sep = ",", na = ".")
head(approval)
## start_date end_date pollster sponsor sample_size population party
## 1 2020-02-02 2020-02-04 YouGov Economist 1500 a all
## 2 2020-02-02 2020-02-04 YouGov Economist 376 a R
## 3 2020-02-02 2020-02-04 YouGov Economist 523 a D
## 4 2020-02-02 2020-02-04 YouGov Economist 599 a I
## 5 2020-02-07 2020-02-09 Morning Consult 2200 a all
## 6 2020-02-07 2020-02-09 Morning Consult 684 a R
## subject tracking
## 1 Trump FALSE
## 2 Trump FALSE
## 3 Trump FALSE
## 4 Trump FALSE
## 5 Trump FALSE
## 6 Trump FALSE
## text
## 1 Do you approve or disapprove of Donald Trump’s handling of the coronavirus outbreak?
## 2 Do you approve or disapprove of Donald Trump’s handling of the coronavirus outbreak?
## 3 Do you approve or disapprove of Donald Trump’s handling of the coronavirus outbreak?
## 4 Do you approve or disapprove of Donald Trump’s handling of the coronavirus outbreak?
## 5 Do you approve or disapprove of the job each of the following is doing in handling the spread of coronavirus in the United States? President Donald Trump
## 6 Do you approve or disapprove of the job each of the following is doing in handling the spread of coronavirus in the United States? President Donald Trump
## approve disapprove
## 1 42 29
## 2 75 6
## 3 21 51
## 4 39 25
## 5 57 22
## 6 88 4
## url
## 1 https://d25d2506sfb94s.cloudfront.net/cumulus_uploads/document/73jqd6u5mv/econTabReport.pdf
## 2 https://d25d2506sfb94s.cloudfront.net/cumulus_uploads/document/73jqd6u5mv/econTabReport.pdf
## 3 https://d25d2506sfb94s.cloudfront.net/cumulus_uploads/document/73jqd6u5mv/econTabReport.pdf
## 4 https://d25d2506sfb94s.cloudfront.net/cumulus_uploads/document/73jqd6u5mv/econTabReport.pdf
## 5 https://morningconsult.com/wp-content/uploads/2020/02/200214_crosstabs_CORONAVIRUS_Adults_v4_JB.pdf
## 6 https://morningconsult.com/wp-content/uploads/2020/02/200214_crosstabs_CORONAVIRUS_Adults_v4_JB.pdf
head(approval_adjusted)
## subject modeldate party startdate enddate pollster grade
## 1 Biden 8/25/2021 D 1/24/2021 1/26/2021 YouGov B+
## 2 Biden 8/25/2021 D 1/28/2021 2/1/2021 Quinnipiac University A-
## 3 Biden 8/25/2021 D 1/29/2021 2/1/2021 Morning Consult B
## 4 Biden 8/25/2021 D 1/31/2021 2/2/2021 YouGov B+
## 5 Biden 8/25/2021 D 2/2/2021 2/2/2021 Data for Progress B
## 6 Biden 8/25/2021 D 2/5/2021 2/7/2021 YouGov B+
## samplesize population weight influence multiversions tracking approve
## 1 477.00 a 0.6285238 0 NA 84.00
## 2 333.25 a 0.6317152 0 NA 93.00
## 3 808.00 rv 0.8337467 0 NA 89.00
## 4 484.00 a 0.5493243 0 NA 88.00
## 5 564.00 a 0.8883977 0 NA 89.22
## 6 336.00 a 0.3521874 0 NA 88.00
## disapprove approve_adjusted disapprove_adjusted timestamp
## 1 3.00 86.91186 2.329323 10:20:07 25 Aug 2021
## 2 5.00 92.32067 5.472705 10:20:07 25 Aug 2021
## 3 7.00 90.42590 6.330154 10:20:07 25 Aug 2021
## 4 7.00 90.91186 6.329323 10:20:07 25 Aug 2021
## 5 7.14 89.62926 6.669734 10:20:07 25 Aug 2021
## 6 8.00 90.91186 7.329323 10:20:07 25 Aug 2021
## url
## 1 https://docs.cdn.yougov.com/ld46rgtdlz/econTabReport.pdf
## 2 https://poll.qu.edu/national/release-detail?ReleaseID=3688
## 3 https://assets.morningconsult.com/wp-uploads/2021/02/03074817/2101110_crosstabs_POLITICO_RVs_v1.pdf
## 4 https://docs.cdn.yougov.com/460mactkmh/econTabReport.pdf
## 5 https://docs.google.com/spreadsheets/d/1dmzc8DpmtpppErDUVQWbqO9WofnPnccC4fQz2vEWDww/edit#gid=0
## 6 https://big.assets.huffingtonpost.com/athena/files/2021/02/11/6025c47bc5b6741597e0e0f2.pdf
# Print Summary for both datasets
summary(approval)
## start_date end_date pollster sponsor
## Length:2809 Length:2809 Length:2809 Length:2809
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## sample_size population party subject
## Min. : 55 Length:2809 Length:2809 Length:2809
## 1st Qu.: 389 Class :character Class :character Class :character
## Median : 640 Mode :character Mode :character Mode :character
## Mean : 2379
## 3rd Qu.: 1226
## Max. :325970
## NA's :22
## tracking text approve disapprove
## Mode :logical Length:2809 Min. : 1.00 Min. : 1.00
## FALSE:2559 Class :character 1st Qu.:30.00 1st Qu.:28.00
## TRUE :242 Mode :character Median :42.00 Median :53.00
## NA's :8 Mean :46.42 Mean :48.48
## 3rd Qu.:66.00 3rd Qu.:63.00
## Max. :98.00 Max. :98.00
## NA's :3 NA's :15
## url
## Length:2809
## Class :character
## Mode :character
##
##
##
##
summary(approval_adjusted)
## subject modeldate party startdate
## Length:2711 Length:2711 Length:2711 Length:2711
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## enddate pollster grade samplesize
## Length:2711 Length:2711 Length:2711 Min. : 36.8
## Class :character Class :character Class :character 1st Qu.: 363.1
## Mode :character Mode :character Mode :character Median : 600.0
## Mean : 2384.5
## 3rd Qu.: 1175.5
## Max. :325970.0
##
## population weight influence multiversions
## Length:2711 Min. :0.02355 Min. :0.00000 Length:2711
## Class :character 1st Qu.:0.32528 1st Qu.:0.00000 Class :character
## Mode :character Median :0.45790 Median :0.00000 Mode :character
## Mean :0.57263 Mean :0.02789
## 3rd Qu.:0.71632 3rd Qu.:0.00000
## Max. :4.75767 Max. :2.02269
##
## tracking approve disapprove approve_adjusted
## Mode:logical Min. : 1.00 Min. : 1.00 Min. : 1.209
## TRUE:242 1st Qu.:30.00 1st Qu.:28.00 1st Qu.:31.668
## NA's:2469 Median :42.00 Median :52.00 Median :41.882
## Mean :46.57 Mean :48.34 Mean :46.758
## 3rd Qu.:67.00 3rd Qu.:63.00 3rd Qu.:64.533
## Max. :98.00 Max. :98.00 Max. :96.426
## NA's :3 NA's :15 NA's :3
## disapprove_adjusted timestamp url
## Min. : 1.646 Length:2711 Length:2711
## 1st Qu.:28.977 Class :character Class :character
## Median :53.305 Mode :character Mode :character
## Mean :48.816
## 3rd Qu.:62.700
## Max. :96.001
## NA's :15
# Get column names for both data sets
colnames(approval, do.NULL = TRUE, prefix = "col")
## [1] "start_date" "end_date" "pollster" "sponsor" "sample_size"
## [6] "population" "party" "subject" "tracking" "text"
## [11] "approve" "disapprove" "url"
colnames(approval_adjusted, do.NULL = TRUE, prefix = "col")
## [1] "subject" "modeldate" "party"
## [4] "startdate" "enddate" "pollster"
## [7] "grade" "samplesize" "population"
## [10] "weight" "influence" "multiversions"
## [13] "tracking" "approve" "disapprove"
## [16] "approve_adjusted" "disapprove_adjusted" "timestamp"
## [19] "url"
# Print Mean and Median for selected columns of both data sets
means <- sapply(approval[, c("approve", "disapprove")], mean)
medians <- sapply(approval[, c("approve", "disapprove")], median)
means_medianDF <- data.frame(means, medians)
means_medianDF
## means medians
## approve NA NA
## disapprove NA NA
means2 <- sapply(approval_adjusted[, c("weight", "influence", "tracking", "approve_adjusted", "disapprove_adjusted")], mean)
medians2 <- sapply(approval_adjusted[, c("weight", "influence", "tracking", "approve_adjusted", "disapprove_adjusted")], median)
means_medianDF2 <- data.frame(means2, medians2)
means_medianDF2
## means2 medians2
## weight 0.57262824 0.4578976
## influence 0.02789216 0.0000000
## tracking NA NA
## approve_adjusted NA NA
## disapprove_adjusted NA NA
# Number of missing values in both data sets
colSums(is.na(approval))
## start_date end_date pollster sponsor sample_size population
## 0 0 0 0 22 0
## party subject tracking text approve disapprove
## 0 0 8 0 3 15
## url
## 0
colSums(is.na(approval_adjusted))
## subject modeldate party startdate
## 0 0 0 0
## enddate pollster grade samplesize
## 0 0 0 0
## population weight influence multiversions
## 0 0 0 0
## tracking approve disapprove approve_adjusted
## 2469 3 15 3
## disapprove_adjusted timestamp url
## 15 0 0
# Create a new data frame with a subset of columns and rows for both data sets
approval_subset <- approval[, c("start_date", "end_date", "pollster", "sponsor", "sample_size", "population", "party", "approve", "disapprove")]
head(approval_subset)
## start_date end_date pollster sponsor sample_size population party
## 1 2020-02-02 2020-02-04 YouGov Economist 1500 a all
## 2 2020-02-02 2020-02-04 YouGov Economist 376 a R
## 3 2020-02-02 2020-02-04 YouGov Economist 523 a D
## 4 2020-02-02 2020-02-04 YouGov Economist 599 a I
## 5 2020-02-07 2020-02-09 Morning Consult 2200 a all
## 6 2020-02-07 2020-02-09 Morning Consult 684 a R
## approve disapprove
## 1 42 29
## 2 75 6
## 3 21 51
## 4 39 25
## 5 57 22
## 6 88 4
approval_adjusted_subset <- approval_adjusted[, c("subject", "party", "grade", "samplesize", "population", "weight", "influence", "approve_adjusted", "disapprove_adjusted")]
head(approval_adjusted_subset)
## subject party grade samplesize population weight influence
## 1 Biden D B+ 477.00 a 0.6285238 0
## 2 Biden D A- 333.25 a 0.6317152 0
## 3 Biden D B 808.00 rv 0.8337467 0
## 4 Biden D B+ 484.00 a 0.5493243 0
## 5 Biden D B 564.00 a 0.8883977 0
## 6 Biden D B+ 336.00 a 0.3521874 0
## approve_adjusted disapprove_adjusted
## 1 86.91186 2.329323
## 2 92.32067 5.472705
## 3 90.42590 6.330154
## 4 90.91186 6.329323
## 5 89.62926 6.669734
## 6 90.91186 7.329323
# Create new column names for both data sets to create a new data frame
colnames(approval_subset) <- c("Start Date", "End Date", "Pollster", "Sponsor", "Sample Size", "Population", "Party", "Approve", "Disapprove")
colnames(approval_subset)
## [1] "Start Date" "End Date" "Pollster" "Sponsor" "Sample Size"
## [6] "Population" "Party" "Approve" "Disapprove"
colnames(approval_adjusted_subset) <- c("Subject", "Party_subset", "Grade", "Sample_Size", "Population_subset", "Weight", "Influence", "Approve_Adjusted", "Disapprove_Adjusted" )
colnames(approval_adjusted_subset)
## [1] "Subject" "Party_subset" "Grade"
## [4] "Sample_Size" "Population_subset" "Weight"
## [7] "Influence" "Approve_Adjusted" "Disapprove_Adjusted"
# Print subset table to see new changes
head(approval_subset)
## Start Date End Date Pollster Sponsor Sample Size Population Party
## 1 2020-02-02 2020-02-04 YouGov Economist 1500 a all
## 2 2020-02-02 2020-02-04 YouGov Economist 376 a R
## 3 2020-02-02 2020-02-04 YouGov Economist 523 a D
## 4 2020-02-02 2020-02-04 YouGov Economist 599 a I
## 5 2020-02-07 2020-02-09 Morning Consult 2200 a all
## 6 2020-02-07 2020-02-09 Morning Consult 684 a R
## Approve Disapprove
## 1 42 29
## 2 75 6
## 3 21 51
## 4 39 25
## 5 57 22
## 6 88 4
head(approval_adjusted_subset)
## Subject Party_subset Grade Sample_Size Population_subset Weight Influence
## 1 Biden D B+ 477.00 a 0.6285238 0
## 2 Biden D A- 333.25 a 0.6317152 0
## 3 Biden D B 808.00 rv 0.8337467 0
## 4 Biden D B+ 484.00 a 0.5493243 0
## 5 Biden D B 564.00 a 0.8883977 0
## 6 Biden D B+ 336.00 a 0.3521874 0
## Approve_Adjusted Disapprove_Adjusted
## 1 86.91186 2.329323
## 2 92.32067 5.472705
## 3 90.42590 6.330154
## 4 90.91186 6.329323
## 5 89.62926 6.669734
## 6 90.91186 7.329323
# Number of missing values in both data sets subsets
colSums(is.na(approval_subset))
## Start Date End Date Pollster Sponsor Sample Size Population
## 0 0 0 0 22 0
## Party Approve Disapprove
## 0 3 15
colSums(is.na(approval_adjusted_subset))
## Subject Party_subset Grade Sample_Size
## 0 0 0 0
## Population_subset Weight Influence Approve_Adjusted
## 0 0 0 3
## Disapprove_Adjusted
## 15
# Histogram for approval ratings for COVID-19 presidential response
hist(approval_subset$Approve, main = "Approval Ratings for Presidents' Response to COVID-19", xlab = "Approve")
# Histogram for adjusted approval ratings for COVID-19 presidential response
hist(approval_adjusted_subset$Approve_Adjusted, main = "Adjusted Approval Ratings for Presidents' Response to COVID-19", xlab = "Approve Adjusted")
# Histogram for disapproved ratings to COVID-19 presidental response
hist(approval_subset$Disapprove, main = "Disapproval Ratings for Presidents' Response to COVID-19", xlab = "Disapprove")
# Histogram for adjusted disapproval ratings to COVID-19 presidential response
hist(approval_adjusted_subset$Disapprove_Adjusted, main = "Adjusted Disapproval Ratings for Presidents' Response to COVID-19", xlab = "Disapprove Adjusted")
# Grouped Bar graph comparing population and party from first data set
counts <- table(approval_subset$Population, approval_subset$Party)
barplot(counts, main="Population and Party Comparisson",
xlab="Party", col=c("yellow", "red", "blue", "green"), legend = rownames(counts), beside=TRUE)
# Grouped Bar graph comparing adjusted population and party
counts <- table(approval_adjusted_subset$Population_subset, approval_adjusted_subset$Party_subset)
barplot(counts, main="Adjusted Population and Party Comparisson",
xlab="Party Subset", col=c("yellow", "red", "blue", "green"), legend = rownames(counts), beside=TRUE)
# Grouped Bar graph comparing adjusted grade and party
counts <- table(approval_adjusted_subset$Grade, approval_adjusted_subset$Party_subset)
barplot(counts, main="Adjusted Grade and Party Comparisson",
xlab="Party Subset", col=c("yellow", "red", "blue", "green", "orange", "pink", "purple", "lightblue", "gray", "magenta", "maroon", "violet"), legend = rownames(counts), beside=TRUE)