library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Vote <- read_csv("~/Downloads/Voter.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## weight_18_24_2018 = col_logical(),
## izip_2019 = col_character(),
## housevote_other_2019 = col_character(),
## senatevote_other_2019 = col_character(),
## senatevote2_other_2019 = col_character(),
## SenCand1Name_2019 = col_character(),
## SenCand1Party_2019 = col_character(),
## SenCand2Name_2019 = col_character(),
## SenCand2Party_2019 = col_character(),
## SenCand3Name_2019 = col_character(),
## SenCand3Party_2019 = col_character(),
## SenCand1Name2_2019 = col_character(),
## SenCand1Party2_2019 = col_character(),
## SenCand2Name2_2019 = col_character(),
## SenCand2Party2_2019 = col_character(),
## SenCand3Name2_2019 = col_character(),
## SenCand3Party2_2019 = col_character(),
## governorvote_other_2019 = col_character(),
## GovCand1Name_2019 = col_character(),
## GovCand1Party_2019 = col_character()
## # ... with 108 more columns
## )
## See spec(...) for full column specifications.
## Warning: 800 parsing failures.
## row col expected actual file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 '~/Downloads/Voter.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 '~/Downloads/Voter.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 '~/Downloads/Voter.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 '~/Downloads/Voter.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 '~/Downloads/Voter.csv'
## .... ................. .................. ................ .......................
## See problems(...) for more details.
head(Vote)
## # A tibble: 6 x 1,282
## weight_2016 weight_2017 weight_panel_20… weight_latino_2… weight_18_24_20…
## <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 0.358 0.438 0.503 NA NA
## 2 0.563 0.366 0.389 NA NA
## 3 0.552 0.550 0.684 NA NA
## 4 0.208 NA NA NA NA
## 5 0.334 0.346 0.322 NA NA
## 6 0.207 0.148 0.594 NA NA
## # … with 1,277 more variables: weight_overall_2018 <dbl>, weight_2019 <dbl>,
## # weight1_2018 <dbl>, weight1_2019 <dbl>, weight2_2019 <dbl>,
## # weight3_2019 <dbl>, cassfullcd <dbl>, vote2020_2019 <dbl>,
## # trumpapp_2019 <dbl>, fav_trump_2019 <dbl>, fav_obama_2019 <dbl>,
## # fav_hrc_2019 <dbl>, fav_sanders_2019 <dbl>, fav_putin_2019 <dbl>,
## # fav_schumer_2019 <dbl>, fav_pelosi_2019 <dbl>, fav_comey_2019 <dbl>,
## # fav_mueller_2019 <dbl>, fav_mcconnell_2019 <dbl>, fav_kavanaugh_2019 <dbl>,
## # fav_biden_2019 <dbl>, fav_warren_2019 <dbl>, fav_harris_2019 <dbl>,
## # fav_gillibrand_2019 <dbl>, fav_patrick_2019 <dbl>, fav_booker_2019 <dbl>,
## # fav_garcetti_2019 <dbl>, fav_klobuchar_2019 <dbl>, fav_gorsuch_2019 <dbl>,
## # fav_kasich_2019 <dbl>, fav_haley_2019 <dbl>, fav_bloomberg_2019 <dbl>,
## # fav_holder_2019 <dbl>, fav_avenatti_2019 <dbl>, fav_castro_2019 <dbl>,
## # fav_landrieu_2019 <dbl>, fav_orourke_2019 <dbl>,
## # fav_hickenlooper_2019 <dbl>, fav_pence_2019 <dbl>, add_confirm_2019 <dbl>,
## # izip_2019 <chr>, votereg_2019 <dbl>, votereg_f_2019 <dbl>,
## # regzip_2019 <dbl>, region_2019 <dbl>, turnout18post_2019 <dbl>,
## # tsmart_G2018_2019 <dbl>, tsmart_G2018_vote_type_2019 <dbl>,
## # tsmart_P2018_2019 <dbl>, tsmart_P2018_party_2019 <dbl>,
## # tsmart_P2018_vote_type_2019 <dbl>, housevote_2019 <dbl>,
## # housevote_other_2019 <chr>, senatevote_2019 <dbl>,
## # senatevote_other_2019 <chr>, senatevote2_2019 <dbl>,
## # senatevote2_other_2019 <chr>, SenCand1Name_2019 <chr>,
## # SenCand1Party_2019 <chr>, SenCand2Name_2019 <chr>,
## # SenCand2Party_2019 <chr>, SenCand3Name_2019 <chr>,
## # SenCand3Party_2019 <chr>, SenCand1Name2_2019 <chr>,
## # SenCand1Party2_2019 <chr>, SenCand2Name2_2019 <chr>,
## # SenCand2Party2_2019 <chr>, SenCand3Name2_2019 <chr>,
## # SenCand3Party2_2019 <chr>, governorvote_2019 <dbl>,
## # governorvote_other_2019 <chr>, GovCand1Name_2019 <chr>,
## # GovCand1Party_2019 <chr>, GovCand2Name_2019 <chr>,
## # GovCand2Party_2019 <chr>, GovCand3Name_2019 <chr>,
## # GovCand3Party_2019 <chr>, inst_court_2019 <dbl>, inst_media_2019 <dbl>,
## # inst_congress_2019 <dbl>, inst_justice_2019 <dbl>, inst_FBI_2019 <dbl>,
## # inst_military_2019 <dbl>, inst_church_2019 <dbl>, inst_business_2019 <dbl>,
## # Democrats_2019 <dbl>, Republicans_2019 <dbl>, Men_2019 <dbl>,
## # Women_2019 <dbl>, wm_2019 <dbl>, ww_2019 <dbl>, bm_2019 <dbl>,
## # bw_2019 <dbl>, hm_2019 <dbl>, hw_2019 <dbl>, rwm_2019 <dbl>,
## # rww_2019 <dbl>, rbm_2019 <dbl>, rbw_2019 <dbl>, pwm_2019 <dbl>, …
Vote <-Vote %>%
mutate(vote2020_2019 =
ifelse(vote2020_2019==1,"Donald Trump",
ifelse(vote2020_2019==2,"The Democratic candidate",
ifelse(vote2020_2019==3, "I would not vote",
ifelse(vote2020_2019==4, "Not sure", NA)))),
Women_2019 =
ifelse(Women_2019>100,NA, Women_2019))%>%
select(Women_2019, vote2020_2019)%>%
filter(vote2020_2019 %in% c("Donald Trump","The Democratic candidate"))
Vote %>%
group_by(vote2020_2019) %>%
summarize(Womens_2019 = mean(Women_2019, na.rm=TRUE))
## # A tibble: 2 x 2
## vote2020_2019 Womens_2019
## <chr> <dbl>
## 1 Donald Trump 73.0
## 2 The Democratic candidate 81.9
This shows that those who would vote for the Democratic candidate would have more respect for women than those who would vote for Donald Trump.
Vote %>%
filter(vote2020_2019 %in% c("Donald Trump", "The Democratic candidate")) %>%
ggplot()+
geom_histogram(aes(x=Women_2019, fill=vote2020_2019))+
facet_wrap(~vote2020_2019)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 305 rows containing non-finite values (stat_bin).
It seems that those who would vote for the Democratic candidate lean more on the higher ratings on their feelings toward women. On the other side, we see that those who vote for Donald Trump lean more on the 50s.
Donald_vote <- Vote %>%
filter(vote2020_2019=="Donald Trump")
Donald_vote <-
replicate(10000,
sample(Donald_vote$Women_2019,40) %>%
mean(na.rm=TRUE)) %>%
data.frame()%>%
rename("mean"=1)
Dem_vote <- Vote %>%
filter(vote2020_2019=="The Democratic candidate")
Dem_vote <-
replicate(10000,
sample(Dem_vote$Women_2019,40) %>%
mean(na.rm=TRUE)) %>%
data.frame()%>%
rename("mean"=1)
ggplot()+
geom_histogram(data=Donald_vote, aes(x=mean),fill="red")+ geom_histogram(data=Dem_vote, aes(x=mean),fill="blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Both sampling disturbutions overlap a little in between 70s-80s. However, we see that those who would vote for the Democratic candidate lean more on the higher 80s and those who would vote for Donald Trump lean more on the lower 70s. Both sampling distributions are normally distrubuted.
Vote%>%
summarize(Women_2019 = mean(Women_2019,na.rm=TRUE))
## # A tibble: 1 x 1
## Women_2019
## <dbl>
## 1 77.9
If a person’s political vote (Donald Trump and Democratic candidate) makes no difference in their feeling towards women, then we should find that the group-wise averages are .78 for members of both groups.
t.test(Women_2019~vote2020_2019, data=Vote)
##
## Welch Two Sample t-test
##
## data: Women_2019 by vote2020_2019
## t = -16.006, df = 4588.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.954328 -7.781945
## sample estimates:
## mean in group Donald Trump mean in group The Democratic candidate
## 73.03526 81.90339
options(scipen =9999)
Since the p-value is less than .05, we reject the null hypothesis, there’s no difference between the means, and our findings are significant.