library(tidyverse)
## ── Attaching packages ────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
Vote <- read_csv("~/Downloads/Voter.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   weight_18_24_2018 = col_logical(),
##   izip_2019 = col_character(),
##   housevote_other_2019 = col_character(),
##   senatevote_other_2019 = col_character(),
##   senatevote2_other_2019 = col_character(),
##   SenCand1Name_2019 = col_character(),
##   SenCand1Party_2019 = col_character(),
##   SenCand2Name_2019 = col_character(),
##   SenCand2Party_2019 = col_character(),
##   SenCand3Name_2019 = col_character(),
##   SenCand3Party_2019 = col_character(),
##   SenCand1Name2_2019 = col_character(),
##   SenCand1Party2_2019 = col_character(),
##   SenCand2Name2_2019 = col_character(),
##   SenCand2Party2_2019 = col_character(),
##   SenCand3Name2_2019 = col_character(),
##   SenCand3Party2_2019 = col_character(),
##   governorvote_other_2019 = col_character(),
##   GovCand1Name_2019 = col_character(),
##   GovCand1Party_2019 = col_character()
##   # ... with 108 more columns
## )
## See spec(...) for full column specifications.
## Warning: 800 parsing failures.
##  row               col           expected           actual                    file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 '~/Downloads/Voter.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 '~/Downloads/Voter.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 '~/Downloads/Voter.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 '~/Downloads/Voter.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 '~/Downloads/Voter.csv'
## .... ................. .................. ................ .......................
## See problems(...) for more details.
head(Vote)
## # A tibble: 6 x 1,282
##   weight_2016 weight_2017 weight_panel_20… weight_latino_2… weight_18_24_20…
##         <dbl>       <dbl>            <dbl>            <dbl> <lgl>           
## 1       0.358       0.438            0.503               NA NA              
## 2       0.563       0.366            0.389               NA NA              
## 3       0.552       0.550            0.684               NA NA              
## 4       0.208      NA               NA                   NA NA              
## 5       0.334       0.346            0.322               NA NA              
## 6       0.207       0.148            0.594               NA NA              
## # … with 1,277 more variables: weight_overall_2018 <dbl>, weight_2019 <dbl>,
## #   weight1_2018 <dbl>, weight1_2019 <dbl>, weight2_2019 <dbl>,
## #   weight3_2019 <dbl>, cassfullcd <dbl>, vote2020_2019 <dbl>,
## #   trumpapp_2019 <dbl>, fav_trump_2019 <dbl>, fav_obama_2019 <dbl>,
## #   fav_hrc_2019 <dbl>, fav_sanders_2019 <dbl>, fav_putin_2019 <dbl>,
## #   fav_schumer_2019 <dbl>, fav_pelosi_2019 <dbl>, fav_comey_2019 <dbl>,
## #   fav_mueller_2019 <dbl>, fav_mcconnell_2019 <dbl>, fav_kavanaugh_2019 <dbl>,
## #   fav_biden_2019 <dbl>, fav_warren_2019 <dbl>, fav_harris_2019 <dbl>,
## #   fav_gillibrand_2019 <dbl>, fav_patrick_2019 <dbl>, fav_booker_2019 <dbl>,
## #   fav_garcetti_2019 <dbl>, fav_klobuchar_2019 <dbl>, fav_gorsuch_2019 <dbl>,
## #   fav_kasich_2019 <dbl>, fav_haley_2019 <dbl>, fav_bloomberg_2019 <dbl>,
## #   fav_holder_2019 <dbl>, fav_avenatti_2019 <dbl>, fav_castro_2019 <dbl>,
## #   fav_landrieu_2019 <dbl>, fav_orourke_2019 <dbl>,
## #   fav_hickenlooper_2019 <dbl>, fav_pence_2019 <dbl>, add_confirm_2019 <dbl>,
## #   izip_2019 <chr>, votereg_2019 <dbl>, votereg_f_2019 <dbl>,
## #   regzip_2019 <dbl>, region_2019 <dbl>, turnout18post_2019 <dbl>,
## #   tsmart_G2018_2019 <dbl>, tsmart_G2018_vote_type_2019 <dbl>,
## #   tsmart_P2018_2019 <dbl>, tsmart_P2018_party_2019 <dbl>,
## #   tsmart_P2018_vote_type_2019 <dbl>, housevote_2019 <dbl>,
## #   housevote_other_2019 <chr>, senatevote_2019 <dbl>,
## #   senatevote_other_2019 <chr>, senatevote2_2019 <dbl>,
## #   senatevote2_other_2019 <chr>, SenCand1Name_2019 <chr>,
## #   SenCand1Party_2019 <chr>, SenCand2Name_2019 <chr>,
## #   SenCand2Party_2019 <chr>, SenCand3Name_2019 <chr>,
## #   SenCand3Party_2019 <chr>, SenCand1Name2_2019 <chr>,
## #   SenCand1Party2_2019 <chr>, SenCand2Name2_2019 <chr>,
## #   SenCand2Party2_2019 <chr>, SenCand3Name2_2019 <chr>,
## #   SenCand3Party2_2019 <chr>, governorvote_2019 <dbl>,
## #   governorvote_other_2019 <chr>, GovCand1Name_2019 <chr>,
## #   GovCand1Party_2019 <chr>, GovCand2Name_2019 <chr>,
## #   GovCand2Party_2019 <chr>, GovCand3Name_2019 <chr>,
## #   GovCand3Party_2019 <chr>, inst_court_2019 <dbl>, inst_media_2019 <dbl>,
## #   inst_congress_2019 <dbl>, inst_justice_2019 <dbl>, inst_FBI_2019 <dbl>,
## #   inst_military_2019 <dbl>, inst_church_2019 <dbl>, inst_business_2019 <dbl>,
## #   Democrats_2019 <dbl>, Republicans_2019 <dbl>, Men_2019 <dbl>,
## #   Women_2019 <dbl>, wm_2019 <dbl>, ww_2019 <dbl>, bm_2019 <dbl>,
## #   bw_2019 <dbl>, hm_2019 <dbl>, hw_2019 <dbl>, rwm_2019 <dbl>,
## #   rww_2019 <dbl>, rbm_2019 <dbl>, rbw_2019 <dbl>, pwm_2019 <dbl>, …

How do those who would vote for either Donald Trump or the Democratic Candidate in the 2020 election differ in their perspectives on women, abortion, healthcare laws?

Data Preparation

Vote <- Vote %>%
rename(imiss_t_2019, abortion_2019 = imiss_t_2019, abortidentity_baseline, abort_identity = abortidentity_baseline, abortview3_baseline, Legalize_Abortion = abortview3_baseline)
Vote <-Vote %>%
mutate(vote2020_2019 =
        ifelse(vote2020_2019==1,"Donald Trump",
        ifelse(vote2020_2019==2,"The Democratic candidate",
        ifelse(vote2020_2019==3, "I would not vote",
        ifelse(vote2020_2019==4, "Not sure", NA)))),
       abortion_2019 =
         ifelse(abortion_2019==1,"Very Important",
         ifelse(abortion_2019==2,"Somewhat Important",
         ifelse(abortion_2019==3,"Not very Important",
         ifelse(abortion_2019==4,"Unimportant", NA)))),
       abortion_2019= factor(abortion_2019,
                        levels=c("Unimportant", "Not very Important", "Somewhat Important", "Very Important")),
       abort_identity = 
         ifelse(abort_identity=="1", "Pro-life",
         ifelse(abort_identity=="2", "Pro-choice", NA)),
       abort_identity= factor(abort_identity,
                        levels=c("Pro-life","Pro-choice")),
       healthcarelaw1_2019 =
         ifelse(healthcarelaw1_2019=="1","Very important",
         ifelse(healthcarelaw1_2019=="2","Somewhat important",
         ifelse(healthcarelaw1_2019=="3","Not too important",
         ifelse(healthcarelaw1_2019=="4","Not at all important", NA)))),
       healthcarelaw2_2019 =
         ifelse(healthcarelaw2_2019=="1","Very important",
         ifelse(healthcarelaw2_2019=="2","Somewhat important",
         ifelse(healthcarelaw2_2019=="3","Not too important",
         ifelse(healthcarelaw2_2019=="4", "Not at all important", NA)))),
      Legalize_Abortion =
         ifelse(Legalize_Abortion=="1","Legal in all cases",
         ifelse(Legalize_Abortion=="2","Legal in some cases and illegal in others",
         ifelse(Legalize_Abortion=="3","Illegal in all cases", NA))),
      Legalize_Abortion= factor(Legalize_Abortion, levels=c("Legal in all cases", "Legal in some cases and illegal in others", "Illegal in all cases")),
       favorhealth_2019 =
         ifelse(favorhealth_2019==1,"Strongly favor",
         ifelse(favorhealth_2019==2,"Somewhat favor",
         ifelse(favorhealth_2019==3,"Somewhat oppose",
         ifelse(favorhealth_2019==4,"Strongly oppose", NA)))),
      favorhealth_2019= factor(favorhealth_2019,
                        levels=c("Strongly oppose","Somewhat oppose", "Somewhat favor", "Strongly favor")),
       Women_2019 = ifelse(Women_2019>100,NA, Women_2019)) %>%


filter(vote2020_2019 %in% c("Donald Trump","The Democratic candidate")) 

T-Test on Voter Candidates for 2020’s feelings on Women

Actual Hypothesis / Averages for both groups

Vote %>%
  group_by(vote2020_2019) %>%
  summarize(Women_2019 = mean(Women_2019, na.rm=TRUE))
## # A tibble: 2 x 2
##   vote2020_2019            Women_2019
##   <chr>                         <dbl>
## 1 Donald Trump                   73.0
## 2 The Democratic candidate       81.9
This reveals that those who would vote for Donald Trump have weaker and negative feelings on Women, whereas those would vote for the Democratic candidate have stronger and positive feelings on Women.

Population Distribution // Bar chart

Below is a visualization of the previous table

Vote %>%
    filter(vote2020_2019 %in% c("Donald Trump","The Democratic candidate")) %>%
    ggplot()+
    geom_histogram(aes(x=Women_2019, fill=vote2020_2019))+
    facet_wrap(~vote2020_2019)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 305 rows containing non-finite values (stat_bin).

From the histograms, it shows that those who would vote the Democratic candidate are more than likely than those who would vote for Donald Trump to give a score of 100 when asked about their feelings towards women. The histogram on Voters for Donald Trump reveals that they tend to give somewhat positive ratings since it leans on the higher end of scale. However, the score 50 out of 100 is very common for voters of Donald Trump, and their scores for 100 is not as high as the Republicans.

Null Hypothesis

Vote%>%
summarize(Women_2019 = mean(Women_2019,na.rm=TRUE))
## # A tibble: 1 x 1
##   Women_2019
##        <dbl>
## 1       77.9
If a person’s political vote (Donald Trump and the Democratic candidate) makes no difference in their feeling towards women, then we should find that the group-wise averages are .78 for members of both groups.

Sampling Prep

Donald_vote <- Vote %>%
  filter(vote2020_2019=="Donald Trump")

Donald_vote <-
  replicate(10000,
  sample(Donald_vote$Women_2019,40) %>%
  mean(na.rm=TRUE)) %>%
  data.frame()%>%
  rename("mean"=1)
Dem_vote <- Vote %>%
  filter(vote2020_2019=="The Democratic candidate")

Dem_vote <-
  replicate(10000,
  sample(Dem_vote$Women_2019,40) %>%
  mean(na.rm=TRUE)) %>%
  data.frame()%>%
  rename("mean"=1)

Sampling Distributions

ggplot()+
geom_histogram(data=Donald_vote, aes(x=mean),fill="red")+ geom_histogram(data=Dem_vote, aes(x=mean),fill="blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Both sampling disturbutions overlap a little in between the 70s-80s. However, we see that those who would vote for the Democratic candidate lean more on the higher 80s and those who would vote for Donald Trump lean more on the lower 70s. In conclusion, both sampling distributions are normally distrubuted.

T-Test

t.test(Women_2019~vote2020_2019, data=Vote) 
## 
##  Welch Two Sample t-test
## 
## data:  Women_2019 by vote2020_2019
## t = -16.006, df = 4588.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.954328 -7.781945
## sample estimates:
##             mean in group Donald Trump mean in group The Democratic candidate 
##                               73.03526                               81.90339
options(scipen =9999)
The results show that there is a statistical significance between those who will vote for Donald Trump vs those who will vote for the Democratic candidate in 2020 in their feelings towards Women. This is indicated by the p value which is less than 0.05.The null hypothesis is rejected because there is a difference in the mean value between the two groups.

Chi-Squared Test on the Voter Candidates for 2020’s take on the Importance of Abortion

% of Respondents by Vote for Candidate

table(Vote$vote2020_2019)%>%
prop.table()%>%
round(2)
## 
##             Donald Trump The Democratic candidate 
##                     0.45                     0.55
We see that the majority of the people voting for the Democratic candidate are at 55%, whereas the people voting for Donald Trump are at 45%.

% of Respondents by Importance of Abortion

table(Vote$abortion_2019)%>%
prop.table()%>%
round(2)
## 
##        Unimportant Not very Important Somewhat Important     Very Important 
##               0.11               0.18               0.31               0.41
We see that the majority of the voters feel that Abortion is Very Important.

Null Hypothesis

Donald Trump .45 * .11 = .05 Unimportant .45 * .18 = .08 Not very Important .45 * .31 = .14 Somewhat Important .45 * .41 = .18 Very Important

.55 * .11 = .06 Unimportant .55 * .18 = .10 Not very Important .55 * .31 = .17 Somewhat Important .55 * .41 = .23 Very Important

Cross-tabulation of actual observations

table(Vote$vote2020_2019, Vote$abortion_2019) %>%
  prop.table(1)
##                           
##                            Unimportant Not very Important Somewhat Important
##   Donald Trump              0.11902028         0.18522771         0.27745886
##   The Democratic candidate  0.09820322         0.17317224         0.33178439
##                           
##                            Very Important
##   Donald Trump                 0.41829315
##   The Democratic candidate     0.39684015
The actual observations reveal that both of voters of the two candidates strongly believe that the topic of Abortion is “Very Important”. Voters for Donald Trump have the highest percentange in their answers for “Very Important” at 42%. Voters for the Democratic candidate have the highest percentage in their answers for “Very Important” at 40%.This reveals that both groups have strong opinions on the topic of Abortion.
If there is no relationship betweent the two variables, we still see that both of the groups strongly view the topic of Abortion as “Very Important”, with voters of Donald Trump are at 18% and voters of the Democratic candidate are at 23%.
Both of the observations demonstrates how both percentages in the categories are closely simular.This demonstrates how the topic of Abortion is a very sensitive topic among the the groups.

Chi-Squared Test

chisq.test(Vote$abortion_2019, Vote$vote2020_2019)
## 
##  Pearson's Chi-squared test
## 
## data:  Vote$abortion_2019 and Vote$vote2020_2019
## X-squared = 22.491, df = 3, p-value = 0.00005155
options(scipen =9999)
Since the p-value is less than .05, we reject the null hypothesis, there’s no difference between the means, and our actual observations are significant.

Extract the Observed Values Table from my chi-squared test output

chisq.test(Vote$vote2020_2019, Vote$abortion_2019)[7]
## $expected
##                           Vote$abortion_2019
## Vote$vote2020_2019         Unimportant Not very Important Somewhat Important
##   Donald Trump                280.9389           466.5912           803.4494
##   The Democratic candidate    347.0611           576.4088           992.5506
##                           Vote$abortion_2019
## Vote$vote2020_2019         Very Important
##   Donald Trump                   1062.021
##   The Democratic candidate       1311.979
chisq.test(Vote$vote2020_2019, Vote$abortion_2019)[6]
## $observed
##                           Vote$abortion_2019
## Vote$vote2020_2019         Unimportant Not very Important Somewhat Important
##   Donald Trump                     311                484                725
##   The Democratic candidate         317                559               1071
##                           Vote$abortion_2019
## Vote$vote2020_2019         Very Important
##   Donald Trump                       1093
##   The Democratic candidate           1281
The Expected table and the Observed table reflect that both groups have strong opinions on the topic of abortion being Very Important, since they both have the highest number for “Very Important” in both tables. There is no major difference.

Average Feelings on Healthcare Laws

As you may know, the Affordable Care Act required nearly all Americans to have health insurance, or else pay a fine. Congress passed a law repealing this requirement starting in January 2019. Do you favor or oppose requiring all Americans have health insurance?

table(Vote$favorhealth_2019, Vote$vote2020_2019) %>%
  prop.table(1) %>%
  round(2)
##                  
##                   Donald Trump The Democratic candidate
##   Strongly oppose         0.87                     0.13
##   Somewhat oppose         0.58                     0.42
##   Somewhat favor          0.26                     0.74
##   Strongly favor          0.13                     0.87
From the averages, we definitely see a major difference between the two groups as we see those who would vote Donald Trump have the highest percentage for “Strongly oppose” at 87%. Meanwhile, those who would vote for the Democratic candidate have the highest percentage for “Strongly favor” at 87%.

Data visualization of previous table

Below is a stacked bar chart, visualizing the previous table.

Vote %>%
  filter(!is.na(vote2020_2019),!is.na(favorhealth_2019)) %>%
  group_by(favorhealth_2019, vote2020_2019) %>%
  summarize(n=n()) %>%
  mutate(percent=n/sum(n)) %>%
ggplot () +
  geom_col(aes(x=favorhealth_2019, y=percent, fill=vote2020_2019)) + theme_minimal()

This chart visualizes how those who would vote Donald Trump score higher in “Strongly oppose”, but decreases in the other categories to favor. Those who would vote for the Democratic candidate score higher in “Strongly favor”, but decreases in the other categories to oppose. Both group go in different directions as shown in the chart.
This reveals that voters of Donald Trump are more against of the law requiring all Americans having health insurance, whereas voters of the Democratic candidate are more in favor of the law requiring all Americans having health insurance.

The 2010 health care law provided protections for people with pre-existing health conditions. How important is it to you that the following provisions remain the law?

table(Vote$healthcarelaw1_2019, Vote$vote2020_2019) %>%
  prop.table(1) %>%
  round(2)
##                       
##                        Donald Trump The Democratic candidate
##   Not at all important         0.92                     0.08
##   Not too important            0.86                     0.14
##   Somewhat important           0.72                     0.28
##   Very important               0.30                     0.70
From the averages, we definitely see a major difference between the two groups as we see those who would vote Donald Trump have the highest percentage for “Not at all important” at 92%. Meanwhile, those who would vote for the Democratic candidate have the highest percentage for “Very important” at 70%.

Data visualization of previous table

Vote %>%
  filter(!is.na(vote2020_2019),!is.na(healthcarelaw1_2019)) %>%
  group_by(healthcarelaw1_2019, vote2020_2019) %>%
  summarize(n=n()) %>%
  mutate(percent=n/sum(n)) %>%
ggplot () +
  geom_col(aes(x=healthcarelaw1_2019, y=percent, fill=vote2020_2019)) + theme_minimal()

This chart visualizes how those who would vote Donald Trump score higher in “Not at all important”, but decreases in the other categories on the way to “Very important”. Those who would vote for the Democratic candidate score higher in “Very important”, but decreases in the other categories on the way to “Not at all important”. Both group go in different directions as shown in the chart.
This reveals that voters of Donald Trump view the protections of people with pre-existing health conditions as irrelevant, whereas voters of the Democratic candidate view the protection of people with pre-existing health conditions as relevant.

The 2010 health care law provided protections for people with pre-existing health conditions. How important is it to you that the following provisions remain the law? The provision that prohibits health insurance companies from denying coverage because of a person’s medical history.

table(Vote$healthcarelaw2_2019, Vote$vote2020_2019) %>%
  prop.table(1) %>%
  round(2)
##                       
##                        Donald Trump The Democratic candidate
##   Not at all important         0.88                     0.12
##   Not too important            0.85                     0.15
##   Somewhat important           0.78                     0.22
##   Very important               0.34                     0.66
From the averages, we definitely see a major difference between the two groups as we see those who would vote Donald Trump have the highest percentage for “Not at all important” at 88%. Meanwhile, those who would vote for the Democratic candidate have the highest percentage for “Very important” at 66%.

Data Visualization of previous table

Vote %>%
  filter(!is.na(vote2020_2019),!is.na(healthcarelaw2_2019)) %>%
  group_by(healthcarelaw2_2019, vote2020_2019) %>%
  summarize(n=n()) %>%
  mutate(percent=n/sum(n)) %>%
ggplot () +
  geom_col(aes(x=healthcarelaw2_2019, y=percent, fill=vote2020_2019)) + theme_minimal()

This chart visualizes how those who would vote Donald Trump score higher in “Not at all important”, but decreases in the other categories on the way to “Very important”. Those who would vote for the Democratic candidate score higher in “Very important”, but decreases in the other categories on the way to “Not at all important”. Both group go in different directions as shown in the chart.
This reveals that voters of Donald Trump view the provision that prohibits health insurance companies from denying coverage because of a person’s medical historys as irrelevant, whereas voters of the Democratic candidate view the provision that prohibits health insurance companies from denying coverage because of a person’s medical history as relevant.

Summary

From my analysis, we see a major contrast between the two groups, especially with those who would vote Donald Trump having smaller ratings on their feelings toward Women and identify as “Pro-life”. On the other hand, those who would vote for the Democratic candidate have higher ratings on their feelings towards Women and identify as “Pro-choice”. However, both groups have strong opinions on the importance of Abortion.

My analysis on both groups view on Healthcare laws exhibits another contrast between the two groups. We see that those who would vote for Donald Trump are more likely to view the requirement of all Americans to have health insurance, the protection for people with pre-existing health conditions, and the provision that prohibits health insurance companies from denying coverage because of a person’s medical history as irrelvant and unimportant. Voters of the Democratic candidates are more likely to be view the requirement of all Americans to have health insurance, the protection for people with pre-existing health conditions,and the provision that prohibits health insurance companies from denying coverage because of a person’s medical history as relevant and important.

Overall, my analyses on this report reveal characteristic difference between those who would vote for Donald Trump and the Democratic candidate. It seems that voters of Donald Trump hold religious, patriarchal views and focus more on the individual instead of the country. It seems that voters of the Democratic candidate hold progressive, matriarchal views and focus more on empathy of the country.