Investigating Continuous Outcomes Assignment

I will be comparing Bernie Sanders Voters and Donald Trump Voters based on their response to the Sanders_Trump_2016 variable. I am comparing these two groups on their outcomes for the ft_gays_2016 variable (Feeling towards gay people).

Preliminary Steps

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(ggplot2)
library(tidyr)
library(knitr)

Voter_Data_2019 <- read_csv("~/Downloads/Voter Data 2019.csv")

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   weight_18_24_2018 = col_logical(),
##   izip_2019 = col_character(),
##   housevote_other_2019 = col_character(),
##   senatevote_other_2019 = col_character(),
##   senatevote2_other_2019 = col_character(),
##   SenCand1Name_2019 = col_character(),
##   SenCand1Party_2019 = col_character(),
##   SenCand2Name_2019 = col_character(),
##   SenCand2Party_2019 = col_character(),
##   SenCand3Name_2019 = col_character(),
##   SenCand3Party_2019 = col_character(),
##   SenCand1Name2_2019 = col_character(),
##   SenCand1Party2_2019 = col_character(),
##   SenCand2Name2_2019 = col_character(),
##   SenCand2Party2_2019 = col_character(),
##   SenCand3Name2_2019 = col_character(),
##   SenCand3Party2_2019 = col_character(),
##   governorvote_other_2019 = col_character(),
##   GovCand1Name_2019 = col_character(),
##   GovCand1Party_2019 = col_character()
##   # ... with 108 more columns
## )

## See spec(...) for full column specifications.

## Warning: 800 parsing failures.
##  row               col           expected           actual                              file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 '~/Downloads/Voter Data 2019.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 '~/Downloads/Voter Data 2019.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 '~/Downloads/Voter Data 2019.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 '~/Downloads/Voter Data 2019.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 '~/Downloads/Voter Data 2019.csv'
## .... ................. .................. ................ .................................
## See problems(...) for more details.

 head(Voter_Data_2019)

## # A tibble: 6 x 1,282
##   weight_2016 weight_2017 weight_panel_20… weight_latino_2… weight_18_24_20…
##         <dbl>       <dbl>            <dbl>            <dbl> <lgl>           
## 1       0.358       0.438            0.503               NA NA              
## 2       0.563       0.366            0.389               NA NA              
## 3       0.552       0.550            0.684               NA NA              
## 4       0.208      NA               NA                   NA NA              
## 5       0.334       0.346            0.322               NA NA              
## 6       0.207       0.148            0.594               NA NA              
## # … with 1,277 more variables: weight_overall_2018 <dbl>, weight_2019 <dbl>,
## #   weight1_2018 <dbl>, weight1_2019 <dbl>, weight2_2019 <dbl>,
## #   weight3_2019 <dbl>, cassfullcd <dbl>, vote2020_2019 <dbl>,
## #   trumpapp_2019 <dbl>, fav_trump_2019 <dbl>, fav_obama_2019 <dbl>,
## #   fav_hrc_2019 <dbl>, fav_sanders_2019 <dbl>, fav_putin_2019 <dbl>,
## #   fav_schumer_2019 <dbl>, fav_pelosi_2019 <dbl>, fav_comey_2019 <dbl>,
## #   fav_mueller_2019 <dbl>, fav_mcconnell_2019 <dbl>, fav_kavanaugh_2019 <dbl>,
## #   fav_biden_2019 <dbl>, fav_warren_2019 <dbl>, fav_harris_2019 <dbl>,
## #   fav_gillibrand_2019 <dbl>, fav_patrick_2019 <dbl>, fav_booker_2019 <dbl>,
## #   fav_garcetti_2019 <dbl>, fav_klobuchar_2019 <dbl>, fav_gorsuch_2019 <dbl>,
## #   fav_kasich_2019 <dbl>, fav_haley_2019 <dbl>, fav_bloomberg_2019 <dbl>,
## #   fav_holder_2019 <dbl>, fav_avenatti_2019 <dbl>, fav_castro_2019 <dbl>,
## #   fav_landrieu_2019 <dbl>, fav_orourke_2019 <dbl>,
## #   fav_hickenlooper_2019 <dbl>, fav_pence_2019 <dbl>, add_confirm_2019 <dbl>,
## #   izip_2019 <chr>, votereg_2019 <dbl>, votereg_f_2019 <dbl>,
## #   regzip_2019 <dbl>, region_2019 <dbl>, turnout18post_2019 <dbl>,
## #   tsmart_G2018_2019 <dbl>, tsmart_G2018_vote_type_2019 <dbl>,
## #   tsmart_P2018_2019 <dbl>, tsmart_P2018_party_2019 <dbl>,
## #   tsmart_P2018_vote_type_2019 <dbl>, housevote_2019 <dbl>,
## #   housevote_other_2019 <chr>, senatevote_2019 <dbl>,
## #   senatevote_other_2019 <chr>, senatevote2_2019 <dbl>,
## #   senatevote2_other_2019 <chr>, SenCand1Name_2019 <chr>,
## #   SenCand1Party_2019 <chr>, SenCand2Name_2019 <chr>,
## #   SenCand2Party_2019 <chr>, SenCand3Name_2019 <chr>,
## #   SenCand3Party_2019 <chr>, SenCand1Name2_2019 <chr>,
## #   SenCand1Party2_2019 <chr>, SenCand2Name2_2019 <chr>,
## #   SenCand2Party2_2019 <chr>, SenCand3Name2_2019 <chr>,
## #   SenCand3Party2_2019 <chr>, governorvote_2019 <dbl>,
## #   governorvote_other_2019 <chr>, GovCand1Name_2019 <chr>,
## #   GovCand1Party_2019 <chr>, GovCand2Name_2019 <chr>,
## #   GovCand2Party_2019 <chr>, GovCand3Name_2019 <chr>,
## #   GovCand3Party_2019 <chr>, inst_court_2019 <dbl>, inst_media_2019 <dbl>,
## #   inst_congress_2019 <dbl>, inst_justice_2019 <dbl>, inst_FBI_2019 <dbl>,
## #   inst_military_2019 <dbl>, inst_church_2019 <dbl>, inst_business_2019 <dbl>,
## #   Democrats_2019 <dbl>, Republicans_2019 <dbl>, Men_2019 <dbl>,
## #   Women_2019 <dbl>, wm_2019 <dbl>, ww_2019 <dbl>, bm_2019 <dbl>,
## #   bw_2019 <dbl>, hm_2019 <dbl>, hw_2019 <dbl>, rwm_2019 <dbl>,
## #   rww_2019 <dbl>, rbm_2019 <dbl>, rbw_2019 <dbl>, pwm_2019 <dbl>, …

Voter <- Voter_Data_2019%>%
mutate(Sanders_Trump_2016 =
          ifelse(Sanders_Trump_2016==1,"Bernie Sanders Voters",
          ifelse(Sanders_Trump_2016==2,"Donald Trump Voters",NA)),
      ft_gays_2016 =
          ifelse(ft_gays_2016>100,NA,ft_gays_2016))%>%
select(ft_gays_2016, Sanders_Trump_2016)%>%
filter(Sanders_Trump_2016 %in% c("Bernie Sanders Voters","Donald Trump Voters"))

Tasks

1. How do they differ in their average values? (Calculate group-wise averages)

Voter%>%
  group_by(Sanders_Trump_2016)%>%
  summarize(AVG= mean(ft_gays_2016, na.rm = TRUE))

## # A tibble: 2 x 2
##   Sanders_Trump_2016      AVG
##   <chr>                 <dbl>
## 1 Bernie Sanders Voters  75.4
## 2 Donald Trump Voters    48.6

2. Plot their Population distributions in a histogram, investigating how they differ in their distribution of responses.

Bernie Voter Data

bernie_data<-Voter%>%
  filter(Sanders_Trump_2016=="Bernie Sanders Voters")

replicate(3,
          sample(bernie_data$ft_gays_2016,40)%>%
            mean(na.rm=TRUE))

## [1] 78.125 82.825 73.925

replicate(10000, sample(bernie_data$ft_gays_2016,40)%>%
            mean(na.rm=TRUE))%>%
  data.frame()%>%
  rename("mean"=1) %>%
  ggplot()+
  geom_histogram(aes(x=mean),fill="blue")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Trump Voter Data

trump_data<-Voter%>%
  filter(Sanders_Trump_2016=="Donald Trump Voters")

replicate(3,
          sample(trump_data$ft_gays_2016,40)%>%
            mean(na.rm=TRUE))

## [1] 44.05405 52.38889 55.65000

replicate(10000, sample(trump_data$ft_gays_2016,40)%>%
            mean(na.rm=TRUE))%>%
  data.frame()%>%
  rename("mean"=1) %>%
  ggplot()+
  geom_histogram(aes(x=mean),fill="red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Bernie Sander Voter Data

bernie_data <- Voter%>%
  filter(Sanders_Trump_2016 == "Bernie Sanders Voters")

Bernie Sander Voter Sampling Distribution

bernie_samp_distro<-
  replicate(10000,
            sample(bernie_data$ft_gays_2016,40)%>%
              mean(na.rm=TRUE))%>%
  data.frame()%>%
  rename("mean"=1)

Donald Trump Voter Data

trump_data <- Voter%>%
  filter(Sanders_Trump_2016 == "Donald Trump Voters")

Donald Trump Voter Sampling Distribution

trump_samp_distro <- 
  replicate(10000,
            sample(trump_data$ft_gays_2016,40)%>%
              mean(na.rm=TRUE))%>%
  data.frame()%>%
  rename("mean"=1)

3. Generate & plot their sampling distributions in a histogram

Histogram of Bernie Voters

ggplot()+
  geom_histogram(data = bernie_samp_distro,aes(x=mean),fill="blue")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Histogram of Trump Voters

ggplot()+
  geom_histogram(data = trump_samp_distro,aes(x=mean),fill="red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Sampling Distibution

ggplot()+
  geom_histogram(data = bernie_samp_distro,aes(x=mean),fill="blue")+
  geom_histogram(data = trump_samp_distro,aes(x=mean),fill="red")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

4. Run a t.test to compare means between samples.

t.test(ft_gays_2016~Sanders_Trump_2016, data = Voter)

## 
##  Welch Two Sample t-test
## 
## data:  ft_gays_2016 by Sanders_Trump_2016
## t = 41.684, df = 6464.2, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  25.48147 27.99645
## sample estimates:
## mean in group Bernie Sanders Voters   mean in group Donald Trump Voters 
##                            75.35831                            48.61935

5. Write 2-3 sentences interpreting your findings.

When I was comparing Bernie Sanders Voters and Donald Trump Voters based on their response to the Sanders_Trump_2016 variable and I will be comparing these two groups on their outcomes for the ft_gays_2016 variable (Feeling towards gay people). I have discovered that the Bernie Sanders Voters feel more favorable to gay people than Donald Trump Voters. I can see that the Bernie Sander Voters mean is about 75.36 and the mean for Donald Trump Voters is 48.62, which shows that Bernie Sander Voters feels a little more favorable towards gay people than Donald Trump Voters.