library(tidyverse)
## ── Attaching packages ────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Vote <- read_csv("~/Downloads/Voter.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## weight_18_24_2018 = col_logical(),
## izip_2019 = col_character(),
## housevote_other_2019 = col_character(),
## senatevote_other_2019 = col_character(),
## senatevote2_other_2019 = col_character(),
## SenCand1Name_2019 = col_character(),
## SenCand1Party_2019 = col_character(),
## SenCand2Name_2019 = col_character(),
## SenCand2Party_2019 = col_character(),
## SenCand3Name_2019 = col_character(),
## SenCand3Party_2019 = col_character(),
## SenCand1Name2_2019 = col_character(),
## SenCand1Party2_2019 = col_character(),
## SenCand2Name2_2019 = col_character(),
## SenCand2Party2_2019 = col_character(),
## SenCand3Name2_2019 = col_character(),
## SenCand3Party2_2019 = col_character(),
## governorvote_other_2019 = col_character(),
## GovCand1Name_2019 = col_character(),
## GovCand1Party_2019 = col_character()
## # ... with 108 more columns
## )
## See spec(...) for full column specifications.
## Warning: 800 parsing failures.
## row col expected actual file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 '~/Downloads/Voter.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 '~/Downloads/Voter.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 '~/Downloads/Voter.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 '~/Downloads/Voter.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 '~/Downloads/Voter.csv'
## .... ................. .................. ................ .......................
## See problems(...) for more details.
head(Vote)
## # A tibble: 6 x 1,282
## weight_2016 weight_2017 weight_panel_20… weight_latino_2… weight_18_24_20…
## <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 0.358 0.438 0.503 NA NA
## 2 0.563 0.366 0.389 NA NA
## 3 0.552 0.550 0.684 NA NA
## 4 0.208 NA NA NA NA
## 5 0.334 0.346 0.322 NA NA
## 6 0.207 0.148 0.594 NA NA
## # … with 1,277 more variables: weight_overall_2018 <dbl>, weight_2019 <dbl>,
## # weight1_2018 <dbl>, weight1_2019 <dbl>, weight2_2019 <dbl>,
## # weight3_2019 <dbl>, cassfullcd <dbl>, vote2020_2019 <dbl>,
## # trumpapp_2019 <dbl>, fav_trump_2019 <dbl>, fav_obama_2019 <dbl>,
## # fav_hrc_2019 <dbl>, fav_sanders_2019 <dbl>, fav_putin_2019 <dbl>,
## # fav_schumer_2019 <dbl>, fav_pelosi_2019 <dbl>, fav_comey_2019 <dbl>,
## # fav_mueller_2019 <dbl>, fav_mcconnell_2019 <dbl>, fav_kavanaugh_2019 <dbl>,
## # fav_biden_2019 <dbl>, fav_warren_2019 <dbl>, fav_harris_2019 <dbl>,
## # fav_gillibrand_2019 <dbl>, fav_patrick_2019 <dbl>, fav_booker_2019 <dbl>,
## # fav_garcetti_2019 <dbl>, fav_klobuchar_2019 <dbl>, fav_gorsuch_2019 <dbl>,
## # fav_kasich_2019 <dbl>, fav_haley_2019 <dbl>, fav_bloomberg_2019 <dbl>,
## # fav_holder_2019 <dbl>, fav_avenatti_2019 <dbl>, fav_castro_2019 <dbl>,
## # fav_landrieu_2019 <dbl>, fav_orourke_2019 <dbl>,
## # fav_hickenlooper_2019 <dbl>, fav_pence_2019 <dbl>, add_confirm_2019 <dbl>,
## # izip_2019 <chr>, votereg_2019 <dbl>, votereg_f_2019 <dbl>,
## # regzip_2019 <dbl>, region_2019 <dbl>, turnout18post_2019 <dbl>,
## # tsmart_G2018_2019 <dbl>, tsmart_G2018_vote_type_2019 <dbl>,
## # tsmart_P2018_2019 <dbl>, tsmart_P2018_party_2019 <dbl>,
## # tsmart_P2018_vote_type_2019 <dbl>, housevote_2019 <dbl>,
## # housevote_other_2019 <chr>, senatevote_2019 <dbl>,
## # senatevote_other_2019 <chr>, senatevote2_2019 <dbl>,
## # senatevote2_other_2019 <chr>, SenCand1Name_2019 <chr>,
## # SenCand1Party_2019 <chr>, SenCand2Name_2019 <chr>,
## # SenCand2Party_2019 <chr>, SenCand3Name_2019 <chr>,
## # SenCand3Party_2019 <chr>, SenCand1Name2_2019 <chr>,
## # SenCand1Party2_2019 <chr>, SenCand2Name2_2019 <chr>,
## # SenCand2Party2_2019 <chr>, SenCand3Name2_2019 <chr>,
## # SenCand3Party2_2019 <chr>, governorvote_2019 <dbl>,
## # governorvote_other_2019 <chr>, GovCand1Name_2019 <chr>,
## # GovCand1Party_2019 <chr>, GovCand2Name_2019 <chr>,
## # GovCand2Party_2019 <chr>, GovCand3Name_2019 <chr>,
## # GovCand3Party_2019 <chr>, inst_court_2019 <dbl>, inst_media_2019 <dbl>,
## # inst_congress_2019 <dbl>, inst_justice_2019 <dbl>, inst_FBI_2019 <dbl>,
## # inst_military_2019 <dbl>, inst_church_2019 <dbl>, inst_business_2019 <dbl>,
## # Democrats_2019 <dbl>, Republicans_2019 <dbl>, Men_2019 <dbl>,
## # Women_2019 <dbl>, wm_2019 <dbl>, ww_2019 <dbl>, bm_2019 <dbl>,
## # bw_2019 <dbl>, hm_2019 <dbl>, hw_2019 <dbl>, rwm_2019 <dbl>,
## # rww_2019 <dbl>, rbm_2019 <dbl>, rbw_2019 <dbl>, pwm_2019 <dbl>, …
Vote <-Vote %>%
mutate(vote2020_2019 =
ifelse(vote2020_2019==1,"Donald Trump",
ifelse(vote2020_2019==2,"The Democratic candidate",
ifelse(vote2020_2019==3, "I would not vote",
ifelse(vote2020_2019==4, "Not sure", NA)))),
univhealthcov_2019 = ifelse(univhealthcov_2019==1,"Yes",
ifelse(univhealthcov_2019==2,"No", NA)),
Women_2019 = ifelse(Women_2019>100,NA, Women_2019))%>%
select(Women_2019,univhealthcov_2019, vote2020_2019)%>%
filter(vote2020_2019 %in% c("Donald Trump","The Democratic candidate"))
table(Vote$vote2020_2019)%>%
prop.table()%>%
round(2)
##
## Donald Trump The Democratic candidate
## 0.45 0.55
We see that the majority of the people who would vote for the Democratic candidate in 2020 is at 55%, meanwhile Donald Trump is at 45%.
table(Vote$univhealthcov_2019)%>%
prop.table()%>%
round(2)
##
## No Yes
## 0.46 0.54
We see that the majority of the people voting in 2020 lean more on the support for Universal Healthcare Coverage at 54%.
table(Vote$vote2020_2019, Vote$univhealthcov_2019) %>%
prop.table(1)
##
## No Yes
## Donald Trump 0.90105008 0.09894992
## The Democratic candidate 0.09377071 0.90622929
My actual observation represents that those who would vote Donald Trump in 2020 say no for Universal Health Coverage at 90%. Meanwhile, those who would vote for the Democratic candidate in 2020 say yes for Universal Health Coverage at 91%.
If there is no relationship betweent the two variables, both groups have simular views on Universal Heatlh Coverage, since they both lean on yes.
The actual observations depicts a major difference between the two groups. The independent observation demonstrates how both percentages in the categories are closely simular.
chisq.test(Vote$univhealthcov_2019, Vote$vote2020_2019)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Vote$univhealthcov_2019 and Vote$vote2020_2019
## X-squared = 3568, df = 1, p-value < 2.2e-16
options(scipen =9999)
Since the p-value is less than .05, we reject the null hypothesis, there’s no difference between the means, and our actual observations are significant.
chisq.test(Vote$vote2020_2019, Vote$univhealthcov_2019)[7]
## $expected
## Vote$univhealthcov_2019
## Vote$vote2020_2019 No Yes
## Donald Trump 1132.993 1343.007
## The Democratic candidate 1381.007 1636.993
chisq.test(Vote$vote2020_2019, Vote$univhealthcov_2019)[6]
## $observed
## Vote$univhealthcov_2019
## Vote$vote2020_2019 No Yes
## Donald Trump 2231 245
## The Democratic candidate 283 2735
If there is no relationship betweent the two variables, both groups have simular views on Universal Heatlh Coverage, since they both lean on yes. There are no major gaps in the Null Hypothesis table. The actual observations reflects that those who would vote for Donald Trump in 2020 are mostly against of Universal Healthcare Coverage. In addition, those who would vote for the Democratic candidate in 2020 are mostly in favor of Universal Healthcare Coverage. The Observed Values Table demonstrate a major difference between the two groups.