knitr::opts_chunk$set(echo = TRUE)
require(readr)
## Loading required package: readr
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
require(RCurl)
## Loading required package: RCurl
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.3 ✓ stringr 1.4.0
## ✓ tidyr 1.1.3 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::complete() masks RCurl::complete()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#Import data using Github raw data URL for original data file. Provides data on various survey's of the public perception of how effective two US Presidents were in handling the Covid Pandemic with approval or disapproval rating (%)
#Two Git Hub URL - initial one used without URL and the original finally used with the full data set
#https://raw.githubusercontent.com/schmalmr/607-HW-1/main/covid_approval_polls_no_url.csv
#https://raw.githubusercontent.com/schmalmr/607-HW-1/main/covid_approval_polls.csv
urlfile<-"https://raw.githubusercontent.com/schmalmr/607-HW-1/main/covid_approval_polls.csv"
dataimport_Covid<-read_csv(url(urlfile))
## Rows: 2809 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): pollster, sponsor, population, party, subject, text, url
## dbl (3): sample_size, approve, disapprove
## lgl (1): tracking
## date (2): start_date, end_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#dataimport_Covid<-data_frame(covid_approval_polls)
dataimport_Covid<-data_frame(dataimport_Covid)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
dataimport_Covid
## # A tibble: 2,809 × 13
## start_date end_date pollster sponsor sample_size population party subject
## <date> <date> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 2020-02-02 2020-02-04 YouGov Econom… 1500 a all Trump
## 2 2020-02-02 2020-02-04 YouGov Econom… 376 a R Trump
## 3 2020-02-02 2020-02-04 YouGov Econom… 523 a D Trump
## 4 2020-02-02 2020-02-04 YouGov Econom… 599 a I Trump
## 5 2020-02-07 2020-02-09 Morning C… <NA> 2200 a all Trump
## 6 2020-02-07 2020-02-09 Morning C… <NA> 684 a R Trump
## 7 2020-02-07 2020-02-09 Morning C… <NA> 817 a D Trump
## 8 2020-02-07 2020-02-09 Morning C… <NA> 700 a I Trump
## 9 2020-02-07 2020-02-09 Morning C… Politi… 1996 rv all Trump
## 10 2020-02-07 2020-02-09 Morning C… Politi… 700 rv R Trump
## # … with 2,799 more rows, and 5 more variables: tracking <lgl>, text <chr>,
## # approve <dbl>, disapprove <dbl>, url <chr>
# Check import with view of data and confirm dataset dimensions and colnames
view(dataimport_Covid)
dim(dataimport_Covid)
## [1] 2809 13
colnames(dataimport_Covid)
## [1] "start_date" "end_date" "pollster" "sponsor" "sample_size"
## [6] "population" "party" "subject" "tracking" "text"
## [11] "approve" "disapprove" "url"
#Select columns in the dataimporv_Covid and reorder columns
dataimport_Covid2<-dataimport_Covid[, c("subject","party", "sample_size","population","pollster","approve", "disapprove","end_date")]
#Check new data table dimensions and view data
dim(dataimport_Covid2)
## [1] 2809 8
#view(dataimport_Covid2)
#Rename Columns with new name. Rename the subject col to Active_President for col name.
library(dplyr)
dataimport_Covid2<-rename(dataimport_Covid2, c(active_president=subject))
# Add longer descriptive name for the data characters in the population and party columns.
dataimport_Covid2$population<-as.character(dataimport_Covid2$population)
dataimport_Covid2$population[dataimport_Covid2$population=="a"]<-"adult_voter"
dataimport_Covid2$population[dataimport_Covid2$population=="rv"]<-"registered_voter"
dataimport_Covid2$population[dataimport_Covid2$population=="lv"]<-"likely_voter"
dataimport_Covid2$population<-as.factor(dataimport_Covid2$population)
dataimport_Covid2$party<-as.character(dataimport_Covid2$party)
dataimport_Covid2$party[dataimport_Covid2$party=="R"]<-"republican_voter"
dataimport_Covid2$party[dataimport_Covid2$party=="D"]<-"democract_voter"
dataimport_Covid2$party[dataimport_Covid2$party=="I"]<-"independent_voter"
dataimport_Covid2$party[dataimport_Covid2$party=="all"]<-"all_political_parties"
dataimport_Covid2$party<-as.factor(dataimport_Covid2$party)
dataimport_Covid2<-tibble(dataimport_Covid2)
#Check col name changes, dimensions and a view of the data file to see the changes are completed.
colnames(dataimport_Covid2)
## [1] "active_president" "party" "sample_size" "population"
## [5] "pollster" "approve" "disapprove" "end_date"
dim(dataimport_Covid2)
## [1] 2809 8
view(dataimport_Covid2)
#Graph data in a couple of plots to evaluate the basic data results in a scatter plot. Use end_date of survey for date marker. # Evaluate relative approval statistics due to political party, president in office over time.
ggplot(dataimport_Covid2, aes(x=end_date,y=approve,shape=party))+geom_point(size=2)+scale_shape_manual(values=c(21,24,12,4))
## Warning: Removed 3 rows containing missing values (geom_point).
ggplot(dataimport_Covid2, aes(x=end_date,y=approve,shape=party,fill=active_president))+geom_point(size=2)+scale_shape_manual(values=c(21,24,23,25))+scale_fill_manual(values=c("blue","red"),guide=guide_legend(override.aes = list(shape=21)))
## Warning: Removed 3 rows containing missing values (geom_point).
Conclusions / Finding or recommendations:
This is preliminary screening data analysis and more could be done to improve graphics and evaluation of descret series.
Used the initial polling data file- further analysis can be done using the adjusted polling data file
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.