homework

library(tidyverse)

## ── Attaching packages ────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Vote <- read_csv("~/Downloads/Voter.csv")

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   weight_18_24_2018 = col_logical(),
##   izip_2019 = col_character(),
##   housevote_other_2019 = col_character(),
##   senatevote_other_2019 = col_character(),
##   senatevote2_other_2019 = col_character(),
##   SenCand1Name_2019 = col_character(),
##   SenCand1Party_2019 = col_character(),
##   SenCand2Name_2019 = col_character(),
##   SenCand2Party_2019 = col_character(),
##   SenCand3Name_2019 = col_character(),
##   SenCand3Party_2019 = col_character(),
##   SenCand1Name2_2019 = col_character(),
##   SenCand1Party2_2019 = col_character(),
##   SenCand2Name2_2019 = col_character(),
##   SenCand2Party2_2019 = col_character(),
##   SenCand3Name2_2019 = col_character(),
##   SenCand3Party2_2019 = col_character(),
##   governorvote_other_2019 = col_character(),
##   GovCand1Name_2019 = col_character(),
##   GovCand1Party_2019 = col_character()
##   # ... with 108 more columns
## )

## See spec(...) for full column specifications.

## Warning: 800 parsing failures.
##  row               col           expected           actual                    file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 '~/Downloads/Voter.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 '~/Downloads/Voter.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 '~/Downloads/Voter.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 '~/Downloads/Voter.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 '~/Downloads/Voter.csv'
## .... ................. .................. ................ .......................
## See problems(...) for more details.

head(Vote)

## # A tibble: 6 x 1,282
##   weight_2016 weight_2017 weight_panel_20… weight_latino_2… weight_18_24_20…
##         <dbl>       <dbl>            <dbl>            <dbl> <lgl>           
## 1       0.358       0.438            0.503               NA NA              
## 2       0.563       0.366            0.389               NA NA              
## 3       0.552       0.550            0.684               NA NA              
## 4       0.208      NA               NA                   NA NA              
## 5       0.334       0.346            0.322               NA NA              
## 6       0.207       0.148            0.594               NA NA              
## # … with 1,277 more variables: weight_overall_2018 <dbl>, weight_2019 <dbl>,
## #   weight1_2018 <dbl>, weight1_2019 <dbl>, weight2_2019 <dbl>,
## #   weight3_2019 <dbl>, cassfullcd <dbl>, vote2020_2019 <dbl>,
## #   trumpapp_2019 <dbl>, fav_trump_2019 <dbl>, fav_obama_2019 <dbl>,
## #   fav_hrc_2019 <dbl>, fav_sanders_2019 <dbl>, fav_putin_2019 <dbl>,
## #   fav_schumer_2019 <dbl>, fav_pelosi_2019 <dbl>, fav_comey_2019 <dbl>,
## #   fav_mueller_2019 <dbl>, fav_mcconnell_2019 <dbl>, fav_kavanaugh_2019 <dbl>,
## #   fav_biden_2019 <dbl>, fav_warren_2019 <dbl>, fav_harris_2019 <dbl>,
## #   fav_gillibrand_2019 <dbl>, fav_patrick_2019 <dbl>, fav_booker_2019 <dbl>,
## #   fav_garcetti_2019 <dbl>, fav_klobuchar_2019 <dbl>, fav_gorsuch_2019 <dbl>,
## #   fav_kasich_2019 <dbl>, fav_haley_2019 <dbl>, fav_bloomberg_2019 <dbl>,
## #   fav_holder_2019 <dbl>, fav_avenatti_2019 <dbl>, fav_castro_2019 <dbl>,
## #   fav_landrieu_2019 <dbl>, fav_orourke_2019 <dbl>,
## #   fav_hickenlooper_2019 <dbl>, fav_pence_2019 <dbl>, add_confirm_2019 <dbl>,
## #   izip_2019 <chr>, votereg_2019 <dbl>, votereg_f_2019 <dbl>,
## #   regzip_2019 <dbl>, region_2019 <dbl>, turnout18post_2019 <dbl>,
## #   tsmart_G2018_2019 <dbl>, tsmart_G2018_vote_type_2019 <dbl>,
## #   tsmart_P2018_2019 <dbl>, tsmart_P2018_party_2019 <dbl>,
## #   tsmart_P2018_vote_type_2019 <dbl>, housevote_2019 <dbl>,
## #   housevote_other_2019 <chr>, senatevote_2019 <dbl>,
## #   senatevote_other_2019 <chr>, senatevote2_2019 <dbl>,
## #   senatevote2_other_2019 <chr>, SenCand1Name_2019 <chr>,
## #   SenCand1Party_2019 <chr>, SenCand2Name_2019 <chr>,
## #   SenCand2Party_2019 <chr>, SenCand3Name_2019 <chr>,
## #   SenCand3Party_2019 <chr>, SenCand1Name2_2019 <chr>,
## #   SenCand1Party2_2019 <chr>, SenCand2Name2_2019 <chr>,
## #   SenCand2Party2_2019 <chr>, SenCand3Name2_2019 <chr>,
## #   SenCand3Party2_2019 <chr>, governorvote_2019 <dbl>,
## #   governorvote_other_2019 <chr>, GovCand1Name_2019 <chr>,
## #   GovCand1Party_2019 <chr>, GovCand2Name_2019 <chr>,
## #   GovCand2Party_2019 <chr>, GovCand3Name_2019 <chr>,
## #   GovCand3Party_2019 <chr>, inst_court_2019 <dbl>, inst_media_2019 <dbl>,
## #   inst_congress_2019 <dbl>, inst_justice_2019 <dbl>, inst_FBI_2019 <dbl>,
## #   inst_military_2019 <dbl>, inst_church_2019 <dbl>, inst_business_2019 <dbl>,
## #   Democrats_2019 <dbl>, Republicans_2019 <dbl>, Men_2019 <dbl>,
## #   Women_2019 <dbl>, wm_2019 <dbl>, ww_2019 <dbl>, bm_2019 <dbl>,
## #   bw_2019 <dbl>, hm_2019 <dbl>, hw_2019 <dbl>, rwm_2019 <dbl>,
## #   rww_2019 <dbl>, rbm_2019 <dbl>, rbw_2019 <dbl>, pwm_2019 <dbl>, …

Data prep

Vote <-Vote %>%
mutate(vote2020_2019 =
        ifelse(vote2020_2019==1,"Donald Trump",
        ifelse(vote2020_2019==2,"The Democratic candidate",
        ifelse(vote2020_2019==3, "I would not vote",
        ifelse(vote2020_2019==4, "Not sure", NA)))),
       univhealthcov_2019 = ifelse(univhealthcov_2019==1,"Yes",
                            ifelse(univhealthcov_2019==2,"No", NA)),
       Women_2019 = ifelse(Women_2019>100,NA, Women_2019))%>%

  select(Women_2019,univhealthcov_2019, vote2020_2019)%>%
filter(vote2020_2019 %in% c("Donald Trump","The Democratic candidate"))

1. % of Respondents by Vote for Candidate

table(Vote$vote2020_2019)%>%
prop.table()%>%
round(2)

## 
##             Donald Trump The Democratic candidate 
##                     0.45                     0.55

We see that the majority of the people who would vote for the Democratic candidate in 2020 is at 55%, meanwhile Donald Trump is at 45%.

1B. % of Respondents by Support for Universal Healthcare

table(Vote$univhealthcov_2019)%>%
prop.table()%>%
round(2)

## 
##   No  Yes 
## 0.46 0.54

We see that the majority of the people voting in 2020 lean more on the support for Universal Healthcare Coverage at 54%.

2. If these variables were completely unrelated to one another, what % of respondents would fit each variable combination?

Donald Trump .45 * .46 = .21 No .45 * .54 = .24 Yes

The Democratic candidate .55 * .46 = .25 No .55 * .54 = .30 Yes

3. Generate a crosstab to show how respondents actually distribute across variable combinations. How does this compare to the table you generated in step 2

table(Vote$vote2020_2019, Vote$univhealthcov_2019) %>%
  prop.table(1)

##                           
##                                    No        Yes
##   Donald Trump             0.90105008 0.09894992
##   The Democratic candidate 0.09377071 0.90622929

My actual observation represents that those who would vote Donald Trump in 2020 say no for Universal Health Coverage at 90%. Meanwhile, those who would vote for the Democratic candidate in 2020 say yes for Universal Health Coverage at 91%.

If there is no relationship betweent the two variables, both groups have simular views on Universal Heatlh Coverage, since they both lean on yes.

The actual observations depicts a major difference between the two groups. The independent observation demonstrates how both percentages in the categories are closely simular.

4. Chi-squared test for independence

chisq.test(Vote$univhealthcov_2019, Vote$vote2020_2019)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  Vote$univhealthcov_2019 and Vote$vote2020_2019
## X-squared = 3568, df = 1, p-value < 2.2e-16

options(scipen =9999)

5. Interpret your findings

Since the p-value is less than .05, we reject the null hypothesis, there’s no difference between the means, and our actual observations are significant.

6. Extract the Null Hypothesis Table from your chi-squared test output

chisq.test(Vote$vote2020_2019, Vote$univhealthcov_2019)[7]

## $expected
##                           Vote$univhealthcov_2019
## Vote$vote2020_2019               No      Yes
##   Donald Trump             1132.993 1343.007
##   The Democratic candidate 1381.007 1636.993

7. Extract the Observed Values Table from your chi-squared test output

chisq.test(Vote$vote2020_2019, Vote$univhealthcov_2019)[6]

## $observed
##                           Vote$univhealthcov_2019
## Vote$vote2020_2019           No  Yes
##   Donald Trump             2231  245
##   The Democratic candidate  283 2735

8. Compare & discuss the values observed between the two tables

If there is no relationship betweent the two variables, both groups have simular views on Universal Heatlh Coverage, since they both lean on yes. There are no major gaps in the Null Hypothesis table. The actual observations reflects that those who would vote for Donald Trump in 2020 are mostly against of Universal Healthcare Coverage. In addition, those who would vote for the Democratic candidate in 2020 are mostly in favor of Universal Healthcare Coverage. The Observed Values Table demonstrate a major difference between the two groups.