knitr::opts_chunk$set(echo = TRUE)

require(readr)
## Loading required package: readr
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
require(RCurl)
## Loading required package: RCurl
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.3     ✓ stringr 1.4.0
## ✓ tidyr   1.1.3     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::complete() masks RCurl::complete()
## x dplyr::filter()   masks stats::filter()
## x dplyr::lag()      masks stats::lag()
#Import data using Github raw data URL for original data file.   Provides data on various survey's of the public perception of how effective two US Presidents were in handling the Covid Pandemic with approval or disapproval rating (%) 

#Two Git Hub URL - initial one used without URL and the original finally used with the full data set
#https://raw.githubusercontent.com/schmalmr/607-HW-1/main/covid_approval_polls_no_url.csv
#https://raw.githubusercontent.com/schmalmr/607-HW-1/main/covid_approval_polls.csv

urlfile<-"https://raw.githubusercontent.com/schmalmr/607-HW-1/main/covid_approval_polls.csv" 
dataimport_Covid<-read_csv(url(urlfile))
## Rows: 2809 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): pollster, sponsor, population, party, subject, text, url
## dbl  (3): sample_size, approve, disapprove
## lgl  (1): tracking
## date (2): start_date, end_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#dataimport_Covid<-data_frame(covid_approval_polls)
dataimport_Covid<-data_frame(dataimport_Covid)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
dataimport_Covid
## # A tibble: 2,809 × 13
##    start_date end_date   pollster   sponsor sample_size population party subject
##    <date>     <date>     <chr>      <chr>         <dbl> <chr>      <chr> <chr>  
##  1 2020-02-02 2020-02-04 YouGov     Econom…        1500 a          all   Trump  
##  2 2020-02-02 2020-02-04 YouGov     Econom…         376 a          R     Trump  
##  3 2020-02-02 2020-02-04 YouGov     Econom…         523 a          D     Trump  
##  4 2020-02-02 2020-02-04 YouGov     Econom…         599 a          I     Trump  
##  5 2020-02-07 2020-02-09 Morning C… <NA>           2200 a          all   Trump  
##  6 2020-02-07 2020-02-09 Morning C… <NA>            684 a          R     Trump  
##  7 2020-02-07 2020-02-09 Morning C… <NA>            817 a          D     Trump  
##  8 2020-02-07 2020-02-09 Morning C… <NA>            700 a          I     Trump  
##  9 2020-02-07 2020-02-09 Morning C… Politi…        1996 rv         all   Trump  
## 10 2020-02-07 2020-02-09 Morning C… Politi…         700 rv         R     Trump  
## # … with 2,799 more rows, and 5 more variables: tracking <lgl>, text <chr>,
## #   approve <dbl>, disapprove <dbl>, url <chr>
# Check import with view of data and confirm dataset dimensions and colnames
view(dataimport_Covid)
dim(dataimport_Covid)
## [1] 2809   13
colnames(dataimport_Covid)
##  [1] "start_date"  "end_date"    "pollster"    "sponsor"     "sample_size"
##  [6] "population"  "party"       "subject"     "tracking"    "text"       
## [11] "approve"     "disapprove"  "url"
#Select columns in the dataimporv_Covid and reorder columns  
dataimport_Covid2<-dataimport_Covid[, c("subject","party", "sample_size","population","pollster","approve", "disapprove","end_date")]

#Check new data table dimensions and view data 
dim(dataimport_Covid2)
## [1] 2809    8
#view(dataimport_Covid2)

#Rename Columns with new name.  Rename the subject col to Active_President for col name.

library(dplyr)
dataimport_Covid2<-rename(dataimport_Covid2, c(active_president=subject))

# Add longer descriptive name for the data characters in the population and party columns. 

dataimport_Covid2$population<-as.character(dataimport_Covid2$population)
dataimport_Covid2$population[dataimport_Covid2$population=="a"]<-"adult_voter"
dataimport_Covid2$population[dataimport_Covid2$population=="rv"]<-"registered_voter"
dataimport_Covid2$population[dataimport_Covid2$population=="lv"]<-"likely_voter"
dataimport_Covid2$population<-as.factor(dataimport_Covid2$population)


dataimport_Covid2$party<-as.character(dataimport_Covid2$party)
dataimport_Covid2$party[dataimport_Covid2$party=="R"]<-"republican_voter"
dataimport_Covid2$party[dataimport_Covid2$party=="D"]<-"democract_voter"
dataimport_Covid2$party[dataimport_Covid2$party=="I"]<-"independent_voter"
dataimport_Covid2$party[dataimport_Covid2$party=="all"]<-"all_political_parties"
dataimport_Covid2$party<-as.factor(dataimport_Covid2$party)
dataimport_Covid2<-tibble(dataimport_Covid2)

#Check col name changes, dimensions and a view of the data file to see the changes are completed. 
colnames(dataimport_Covid2)
## [1] "active_president" "party"            "sample_size"      "population"      
## [5] "pollster"         "approve"          "disapprove"       "end_date"
dim(dataimport_Covid2)
## [1] 2809    8
view(dataimport_Covid2)

#Graph data in a couple of plots to evaluate the basic data results in a scatter plot.  Use end_date of survey for date marker. # Evaluate relative approval statistics due to political party, president in office over time.

ggplot(dataimport_Covid2, aes(x=end_date,y=approve,shape=party))+geom_point(size=2)+scale_shape_manual(values=c(21,24,12,4))
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(dataimport_Covid2, aes(x=end_date,y=approve,shape=party,fill=active_president))+geom_point(size=2)+scale_shape_manual(values=c(21,24,23,25))+scale_fill_manual(values=c("blue","red"),guide=guide_legend(override.aes = list(shape=21)))
## Warning: Removed 3 rows containing missing values (geom_point).

Conclusions / Finding or recommendations:

This is preliminary screening data analysis and more could be done to improve graphics and evaluation of descret series.
Used the initial polling data file- further analysis can be done using the adjusted polling data file

  1. In general the overall long term approval of the Presidents actions to resolve Covid have improved over time but there are some indications that drifts in this approval level are occurring. Overall- Biden’s approval rating has been higher and this is likely related to his push for rapid vacination as he took office (likely also correlated with the availability of the vacine which we may find Trump to have enabled - need more informaiton to confirm) For both Presidents: Trump and Biden it appears the initial approval rating was highest at the start of their program announcements for Covid and both have seen some decline over time.
  2. The approval ratings of the Presidents actions seem to be highly correlated with which party (Republican or Democrat) had the active sitting President. Startlingly so, as the republican approval dropped from ~70% to ~30% range as Trump left office and Biden took office. The opposite was true for the democrates where the approval jumped up from ~15% to almost 90% when Trump left office and Biden took office.
  3. Independents maintained an overall higher rating at all times than the lowest party grouping and it tended to increase as Binden took office.
  4. All political parties when grouped together show an up tick in approval as Biden took office (Likel also as he anounced his vacine program - more data needed to correlate this efffect).
  5. We can see the decline in approval as we moved into Aug 2021 which is likely related to the increasing impact of the additional variant (Delta, etc) of Covid and the time it is taking to return to “fully normal” Again- no data to support this specific conclusion but this is the hypothesis based on what is occurring in the US based on recent news coverage.

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.