setwd("C:/Users/malia/OneDrive/Desktop/MSDS DATA 607")
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(foreign)
library(descr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(expss)
##
## Attaching package: 'expss'
## The following objects are masked from 'package:magrittr':
##
## and, equals, or
## The following objects are masked from 'package:stringr':
##
## fixed, regex
## The following objects are masked from 'package:dplyr':
##
## between, compute, contains, first, last, na_if, recode, vars
## The following objects are masked from 'package:purrr':
##
## keep, modify, modify_if, transpose, when
## The following objects are masked from 'package:tidyr':
##
## contains, nest
## The following object is masked from 'package:ggplot2':
##
## vars
library(readr)
library(dplyr)
library(ggplot2)
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.0.3
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 4.0.3
## Registered S3 methods overwritten by 'Hmisc':
## method from
## [.labelled expss
## print.labelled expss
## as.data.frame.labelled expss
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
Loading data frame.
COVID_CONCERN <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/covid-19-polls/master/covid_concern_polls_adjusted.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## subject = col_character(),
## modeldate = col_character(),
## party = col_character(),
## startdate = col_character(),
## enddate = col_character(),
## pollster = col_character(),
## grade = col_character(),
## population = col_character(),
## multiversions = col_logical(),
## tracking = col_logical(),
## timestamp = col_character(),
## url = col_character()
## )
## See spec(...) for full column specifications.
head(COVID_CONCERN)
## # A tibble: 6 x 23
## subject modeldate party startdate enddate pollster grade samplesize population
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 concer~ 1/20/2021 all 1/27/2020 1/29/2~ Morning~ B/C 2202 a
## 2 concer~ 1/20/2021 all 1/31/2020 2/2/20~ Morning~ B/C 2202 a
## 3 concer~ 1/20/2021 all 2/7/2020 2/9/20~ Morning~ B/C 2200 a
## 4 concer~ 1/20/2021 all 2/13/2020 2/18/2~ Kaiser ~ <NA> 1207 a
## 5 concer~ 1/20/2021 all 2/24/2020 2/26/2~ Morning~ B/C 2200 a
## 6 concer~ 1/20/2021 all 2/27/2020 2/27/2~ SurveyM~ <NA> 1051 a
## # ... with 14 more variables: weight <dbl>, influence <dbl>,
## # multiversions <lgl>, tracking <lgl>, very <dbl>, somewhat <dbl>,
## # not_very <dbl>, not_at_all <dbl>, very_adjusted <dbl>,
## # somewhat_adjusted <dbl>, not_very_adjusted <dbl>,
## # not_at_all_adjusted <dbl>, timestamp <chr>, url <chr>
covid_approval_polls_n<-read_csv("https://raw.githubusercontent.com/fivethirtyeight/covid-19-polls/master/covid_approval_polls.csv")
## Parsed with column specification:
## cols(
## start_date = col_date(format = ""),
## end_date = col_date(format = ""),
## pollster = col_character(),
## sponsor = col_character(),
## sample_size = col_double(),
## population = col_character(),
## party = col_character(),
## subject = col_character(),
## tracking = col_logical(),
## text = col_character(),
## approve = col_double(),
## disapprove = col_double(),
## url = col_character()
## )
Subsetting the data frame for the convinience of analysis.
subset_COVID_CONCERN<-subset(COVID_CONCERN,select=c(subject,party,startdate,enddate,very,somewhat,not_very,not_at_all))
head(subset_COVID_CONCERN)
## # A tibble: 6 x 8
## subject party startdate enddate very somewhat not_very not_at_all
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 concern-economy all 1/27/2020 1/29/2020 19 33 23 11
## 2 concern-economy all 1/31/2020 2/2/2020 26 32 25 7
## 3 concern-economy all 2/7/2020 2/9/2020 23 32 24 9
## 4 concern-economy all 2/13/2020 2/18/2020 22 35 28 15
## 5 concern-economy all 2/24/2020 2/26/2020 32 37 18 6
## 6 concern-economy all 2/27/2020 2/27/2020 41 37 14 5
subset_COVID_APPROVAL<-subset(COVID_CONCERN,select=c(subject,party,startdate,enddate,very,somewhat,not_very,not_at_all))
head(subset_COVID_CONCERN)
## # A tibble: 6 x 8
## subject party startdate enddate very somewhat not_very not_at_all
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 concern-economy all 1/27/2020 1/29/2020 19 33 23 11
## 2 concern-economy all 1/31/2020 2/2/2020 26 32 25 7
## 3 concern-economy all 2/7/2020 2/9/2020 23 32 24 9
## 4 concern-economy all 2/13/2020 2/18/2020 22 35 28 15
## 5 concern-economy all 2/24/2020 2/26/2020 32 37 18 6
## 6 concern-economy all 2/27/2020 2/27/2020 41 37 14 5
head(covid_approval_polls_n)
## # A tibble: 6 x 13
## start_date end_date pollster sponsor sample_size population party subject
## <date> <date> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 2020-02-02 2020-02-04 YouGov Econom~ 1500 a all Trump
## 2 2020-02-02 2020-02-04 YouGov Econom~ 376 a R Trump
## 3 2020-02-02 2020-02-04 YouGov Econom~ 523 a D Trump
## 4 2020-02-02 2020-02-04 YouGov Econom~ 599 a I Trump
## 5 2020-02-07 2020-02-09 Morning~ <NA> 2200 a all Trump
## 6 2020-02-07 2020-02-09 Morning~ <NA> 684 a R Trump
## # ... with 5 more variables: tracking <lgl>, text <chr>, approve <dbl>,
## # disapprove <dbl>, url <chr>
subset_COVID_APPROVAL_POLLS<-subset(covid_approval_polls_n,select=c(pollster,sample_size,party,approve,disapprove))
head(subset_COVID_APPROVAL_POLLS)
## # A tibble: 6 x 5
## pollster sample_size party approve disapprove
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 YouGov 1500 all 42 29
## 2 YouGov 376 R 75 6
## 3 YouGov 523 D 21 51
## 4 YouGov 599 I 39 25
## 5 Morning Consult 2200 all 57 22
## 6 Morning Consult 684 R 88 4
Omitting na variables
na.omit(subset_COVID_CONCERN)
## # A tibble: 552 x 8
## subject party startdate enddate very somewhat not_very not_at_all
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 concern-economy all 1/27/2020 1/29/2020 19 33 23 11
## 2 concern-economy all 1/31/2020 2/2/2020 26 32 25 7
## 3 concern-economy all 2/7/2020 2/9/2020 23 32 24 9
## 4 concern-economy all 2/13/2020 2/18/2020 22 35 28 15
## 5 concern-economy all 2/24/2020 2/26/2020 32 37 18 6
## 6 concern-economy all 2/27/2020 2/27/2020 41 37 14 5
## 7 concern-economy all 2/28/2020 3/1/2020 38 33 13 5
## 8 concern-economy all 3/2/2020 3/3/2020 36 36 21 6
## 9 concern-economy all 3/3/2020 3/5/2020 38 37 12 5
## 10 concern-economy all 3/2/2020 3/5/2020 34 40 19 6
## # ... with 542 more rows
The cross tabulatation refers that people are concerned about COVID 19.
crosstab(subset_COVID_CONCERN$subject, subset_COVID_CONCERN$very, prop.r = T, chisq = T, dnn=c("COVID-CONCERN", "Very Concerned about COVID"))
## Warning in chisq.test(tab, correct = FALSE, ...): Chi-squared approximation may
## be incorrect

## Cell Contents
## |-------------------------|
## | Count |
## | Row Percent |
## |-------------------------|
##
## ==============================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
## Very Concerned about COVID
## COVID 8 9 10 11 12 13 14 15 16 18 19 20 21 22 22.8 23 23.1 23.5 23.7 23.9 24 24.6 24.83 25 25.5 26 26.5 26.8 27 27.4 27.5 27.6 27.8 28 28.4 28.5 28.6 28.75 28.9 29 29.5 29.7 29.8 29.98 30 30.5 30.63 31 31.4 31.5 31.6 32 32.2 32.4 32.5 32.55 32.71 33 33.09 33.67 34 34.45 34.5 34.67 34.82 34.86 34.92 35 35.5 35.51 35.75 36 36.21 36.47 36.5 37 37.5 38 38.5 39 39.47 39.5 40 40.5 41 41.5 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 Total
## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## cncr- 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 4 0 0 0 0 0 2 0 0 0 0 2 0 2 0 3 2 9 12 3 14 9 8 7 6 9 4 7 5 6 8 7 8 6 15 5 3 4 3 4 4 1 3 1 2 1 1 199
## 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.5% 0.0% 0.0% 0.5% 0.0% 0.5% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.5% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 1.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.5% 0.0% 0.0% 0.5% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.5% 0.0% 0.0% 0.0% 2.0% 0.0% 0.0% 0.0% 0.0% 0.0% 1.0% 0.0% 0.0% 0.0% 0.0% 1.0% 0.0% 1.0% 0.0% 1.5% 1.0% 4.5% 6.0% 1.5% 7.0% 4.5% 4.0% 3.5% 3.0% 4.5% 2.0% 3.5% 2.5% 3.0% 4.0% 3.5% 4.0% 3.0% 7.5% 2.5% 1.5% 2.0% 1.5% 2.0% 2.0% 0.5% 1.5% 0.5% 1.0% 0.5% 0.5% 35.9%
## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## cncr- 1 4 3 5 7 2 4 3 5 2 3 8 6 16 1 7 1 1 1 1 14 1 1 9 1 17 2 1 6 1 2 1 1 9 1 2 1 1 1 8 1 2 1 1 12 1 1 8 1 1 1 6 1 1 2 1 1 7 1 1 8 1 4 1 2 1 1 9 2 1 1 10 1 1 6 16 4 13 4 3 1 2 4 1 6 2 7 4 1 3 5 6 2 5 4 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 356
## 0.3% 1.1% 0.8% 1.4% 2.0% 0.6% 1.1% 0.8% 1.4% 0.6% 0.8% 2.2% 1.7% 4.5% 0.3% 2.0% 0.3% 0.3% 0.3% 0.3% 3.9% 0.3% 0.3% 2.5% 0.3% 4.8% 0.6% 0.3% 1.7% 0.3% 0.6% 0.3% 0.3% 2.5% 0.3% 0.6% 0.3% 0.3% 0.3% 2.2% 0.3% 0.6% 0.3% 0.3% 3.4% 0.3% 0.3% 2.2% 0.3% 0.3% 0.3% 1.7% 0.3% 0.3% 0.6% 0.3% 0.3% 2.0% 0.3% 0.3% 2.2% 0.3% 1.1% 0.3% 0.6% 0.3% 0.3% 2.5% 0.6% 0.3% 0.3% 2.8% 0.3% 0.3% 1.7% 4.5% 1.1% 3.7% 1.1% 0.8% 0.3% 0.6% 1.1% 0.3% 1.7% 0.6% 2.0% 1.1% 0.3% 0.8% 1.4% 1.7% 0.6% 1.4% 1.1% 0.3% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.3% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 64.1%
## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Total 1 4 3 5 7 2 4 3 5 2 4 8 6 17 1 8 1 1 1 1 14 1 1 9 1 18 2 1 6 1 2 1 1 9 1 2 1 1 1 8 1 2 1 1 12 1 1 8 1 1 1 8 1 1 2 1 1 8 1 1 9 1 4 1 2 1 1 10 2 1 1 14 1 1 6 16 4 15 4 3 1 2 6 1 8 2 10 6 10 15 8 20 11 13 11 7 9 4 7 5 6 8 7 8 7 15 5 3 4 3 4 4 1 3 1 2 1 1 555
## ==============================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================
##
## Statistics for All Table Factors
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 394.6496 d.f. = 117 p <2e-16
##
## Minimum expected frequency: 0.3585586
## Cells with Expected Frequency < 5: 197 of 236 (83.47458%)
Filtering data on the basis of concern level:
subset_COVID_CONCERN %>%
select(subject,startdate,very)%>%
filter(startdate %in% c("7/3/2020","7/5/2020,9/25/2020","10/16/2020"),subject%in%c("concern-infected"))
## # A tibble: 5 x 3
## subject startdate very
## <chr> <chr> <dbl>
## 1 concern-infected 7/3/2020 24
## 2 concern-infected 7/3/2020 35
## 3 concern-infected 10/16/2020 24
## 4 concern-infected 10/16/2020 26
## 5 concern-infected 10/16/2020 49
subset_COVID_CONCERN %>%
select(subject,startdate,not_at_all)%>%
filter(startdate %in% c("7/3/2020","7/5/2020","9/25/2020","10/16/2020"),subject%in%c("concern-infected"))
## # A tibble: 7 x 3
## subject startdate not_at_all
## <chr> <chr> <dbl>
## 1 concern-infected 7/3/2020 15
## 2 concern-infected 7/3/2020 10
## 3 concern-infected 7/5/2020 13
## 4 concern-infected 9/25/2020 16
## 5 concern-infected 10/16/2020 15
## 6 concern-infected 10/16/2020 14
## 7 concern-infected 10/16/2020 NA
subset_COVID_CONCERN %>%
select(subject,startdate,very)%>%
filter(startdate %in% c("9/7/2020","9/28/2020"),subject%in%c("concern-economy"))
## # A tibble: 2 x 3
## subject startdate very
## <chr> <chr> <dbl>
## 1 concern-economy 9/7/2020 62
## 2 concern-economy 9/28/2020 61
subset_COVID_CONCERN %>%
select(subject,startdate,not_at_all)%>%
filter(startdate %in% c("9/25/2020","10/16/2020"),subject%in%c("concern-economy"))
## # A tibble: 1 x 3
## subject startdate not_at_all
## <chr> <chr> <dbl>
## 1 concern-economy 9/25/2020 2
Filtering data on the basis of approval and disapproval of response of the President Trump regarding the COVID 19.A certain pollster named Morning-Consult whish has bigger sample size, and party variable has alsoo been filtered to have a microsopic understanding regarding the approval/disapproval data.
subset_COVID_APPROVAL_POLLS %>%
select(pollster,party,approve,disapprove)%>%
filter(pollster %in% c("Morning Consult"))
## # A tibble: 331 x 4
## pollster party approve disapprove
## <chr> <chr> <dbl> <dbl>
## 1 Morning Consult all 57 22
## 2 Morning Consult R 88 4
## 3 Morning Consult D 37 37
## 4 Morning Consult I 50 22
## 5 Morning Consult all 39 35
## 6 Morning Consult R 71 8
## 7 Morning Consult D 15 60
## 8 Morning Consult I 34 33
## 9 Morning Consult all 54 27
## 10 Morning Consult R 84 7
## # ... with 321 more rows
histogram(subset_COVID_APPROVAL_POLLS$approve,main = "Histogram 1",xlab = "Trends for approval for the President Trump",col = "blue")

histogram(subset_COVID_APPROVAL_POLLS$disapprove,main = "Histogram 2",xlab = "Trends for disapproval for the President Trump",col = "red")

ggplot()+
geom_line(data=subset_COVID_CONCERN$very.graph,aes(x=subset_COVID_CONCERN$subject,y=subset_COVID_CONCERN$very))+labs(title="Trends in support for anti-miscgeny laws from 1970 to 2010, GSS", x="subject",y="Proportions")
## Warning: Unknown or uninitialised column: `very.graph`.
