Final paper

IMPORT DATA

library(readr)
library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data<-read_csv("C:/Users/JaminS/Downloads/Voter Data 2019.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   weight_18_24_2018 = col_logical(),
##   izip_2019 = col_character(),
##   housevote_other_2019 = col_character(),
##   senatevote_other_2019 = col_character(),
##   senatevote2_other_2019 = col_character(),
##   SenCand1Name_2019 = col_character(),
##   SenCand1Party_2019 = col_character(),
##   SenCand2Name_2019 = col_character(),
##   SenCand2Party_2019 = col_character(),
##   SenCand3Name_2019 = col_character(),
##   SenCand3Party_2019 = col_character(),
##   SenCand1Name2_2019 = col_character(),
##   SenCand1Party2_2019 = col_character(),
##   SenCand2Name2_2019 = col_character(),
##   SenCand2Party2_2019 = col_character(),
##   SenCand3Name2_2019 = col_character(),
##   SenCand3Party2_2019 = col_character(),
##   governorvote_other_2019 = col_character(),
##   GovCand1Name_2019 = col_character(),
##   GovCand1Party_2019 = col_character()
##   # ... with 108 more columns
## )
## i Use `spec()` for the full column specifications.

## Warning: 800 parsing failures.
##  row               col           expected           actual                                            file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## .... ................. .................. ................ ...............................................
## See problems(...) for more details.

head(data)

## # A tibble: 6 x 1,282
##   weight_2016 weight_2017 weight_panel_20~ weight_latino_2~ weight_18_24_20~
##         <dbl>       <dbl>            <dbl>            <dbl> <lgl>           
## 1       0.358       0.438            0.503               NA NA              
## 2       0.563       0.366            0.389               NA NA              
## 3       0.552       0.550            0.684               NA NA              
## 4       0.208      NA               NA                   NA NA              
## 5       0.334       0.346            0.322               NA NA              
## 6       0.207       0.148            0.594               NA NA              
## # ... with 1,277 more variables: weight_overall_2018 <dbl>, weight_2019 <dbl>,
## #   weight1_2018 <dbl>, weight1_2019 <dbl>, weight2_2019 <dbl>,
## #   weight3_2019 <dbl>, cassfullcd <dbl>, vote2020_2019 <dbl>,
## #   trumpapp_2019 <dbl>, fav_trump_2019 <dbl>, fav_obama_2019 <dbl>,
## #   fav_hrc_2019 <dbl>, fav_sanders_2019 <dbl>, fav_putin_2019 <dbl>,
## #   fav_schumer_2019 <dbl>, fav_pelosi_2019 <dbl>, fav_comey_2019 <dbl>,
## #   fav_mueller_2019 <dbl>, fav_mcconnell_2019 <dbl>, fav_kavanaugh_2019 <dbl>,
## #   fav_biden_2019 <dbl>, fav_warren_2019 <dbl>, fav_harris_2019 <dbl>,
## #   fav_gillibrand_2019 <dbl>, fav_patrick_2019 <dbl>, fav_booker_2019 <dbl>,
## #   fav_garcetti_2019 <dbl>, fav_klobuchar_2019 <dbl>, fav_gorsuch_2019 <dbl>,
## #   fav_kasich_2019 <dbl>, fav_haley_2019 <dbl>, fav_bloomberg_2019 <dbl>,
## #   fav_holder_2019 <dbl>, fav_avenatti_2019 <dbl>, fav_castro_2019 <dbl>,
## #   fav_landrieu_2019 <dbl>, fav_orourke_2019 <dbl>,
## #   fav_hickenlooper_2019 <dbl>, fav_pence_2019 <dbl>, add_confirm_2019 <dbl>,
## #   izip_2019 <chr>, votereg_2019 <dbl>, votereg_f_2019 <dbl>,
## #   regzip_2019 <dbl>, region_2019 <dbl>, turnout18post_2019 <dbl>,
## #   tsmart_G2018_2019 <dbl>, tsmart_G2018_vote_type_2019 <dbl>,
## #   tsmart_P2018_2019 <dbl>, tsmart_P2018_party_2019 <dbl>,
## #   tsmart_P2018_vote_type_2019 <dbl>, housevote_2019 <dbl>,
## #   housevote_other_2019 <chr>, senatevote_2019 <dbl>,
## #   senatevote_other_2019 <chr>, senatevote2_2019 <dbl>,
## #   senatevote2_other_2019 <chr>, SenCand1Name_2019 <chr>,
## #   SenCand1Party_2019 <chr>, SenCand2Name_2019 <chr>,
## #   SenCand2Party_2019 <chr>, SenCand3Name_2019 <chr>,
## #   SenCand3Party_2019 <chr>, SenCand1Name2_2019 <chr>,
## #   SenCand1Party2_2019 <chr>, SenCand2Name2_2019 <chr>,
## #   SenCand2Party2_2019 <chr>, SenCand3Name2_2019 <chr>,
## #   SenCand3Party2_2019 <chr>, governorvote_2019 <dbl>,
## #   governorvote_other_2019 <chr>, GovCand1Name_2019 <chr>,
## #   GovCand1Party_2019 <chr>, GovCand2Name_2019 <chr>,
## #   GovCand2Party_2019 <chr>, GovCand3Name_2019 <chr>,
## #   GovCand3Party_2019 <chr>, inst_court_2019 <dbl>, inst_media_2019 <dbl>,
## #   inst_congress_2019 <dbl>, inst_justice_2019 <dbl>, inst_FBI_2019 <dbl>,
## #   inst_military_2019 <dbl>, inst_church_2019 <dbl>, inst_business_2019 <dbl>,
## #   Democrats_2019 <dbl>, Republicans_2019 <dbl>, Men_2019 <dbl>,
## #   Women_2019 <dbl>, wm_2019 <dbl>, ww_2019 <dbl>, bm_2019 <dbl>,
## #   bw_2019 <dbl>, hm_2019 <dbl>, hw_2019 <dbl>, rwm_2019 <dbl>,
## #   rww_2019 <dbl>, rbm_2019 <dbl>, rbw_2019 <dbl>, pwm_2019 <dbl>, ...

data%>%
  select(fav_sanders_2019,fav_biden_2019,tsmart_P2018_vote_type_2019, pid3_2019)

## # A tibble: 9,548 x 4
##    fav_sanders_2019 fav_biden_2019 tsmart_P2018_vote_type_2019 pid3_2019
##               <dbl>          <dbl>                       <dbl>     <dbl>
##  1                1              1                           3         1
##  2               NA             NA                          99        NA
##  3                2              1                           1         3
##  4                4              1                          99         3
##  5                3              2                          99         2
##  6                3              1                           3         1
##  7                3              1                           5         1
##  8               NA             NA                          99        NA
##  9               NA             NA                          99        NA
## 10                2              3                           5         1
## # ... with 9,538 more rows

Introduction:

This research is based on how democrats and republican parties in 2019 oppinion among the two candiates, biden and sanders. My continious data is tsmart_P2018_vote_type_2019. Fav_sanders_2019 and Fav_biden_2019 is going to be the cateorial data. and pid3_2019 is going to be my responders of the group.

DATA<-data%>%
  select(fav_sanders_2019,fav_biden_2019,tsmart_P2018_vote_type_2019,pid3_2019)%>%
  mutate(Sanderfavorite = ifelse(fav_sanders_2019==1, "very favorable",
                          ifelse(fav_sanders_2019==2, "Somewhat favorable",
                          ifelse(fav_sanders_2019==3, "Somewhat unfavorable",
                          ifelse(fav_sanders_2019==4,"very unfavorable",
                          ifelse(fav_sanders_2019==5, "very unfavorable",
                          ifelse(fav_sanders_2019==8, "Don't Know",
                          ifelse(fav_sanders_2019==98, "Skipped",NA))))))),
         Bidenfavorite = ifelse(fav_biden_2019==1, "very favorable",
                        ifelse(fav_biden_2019==2,"somewhat favorable",
                        ifelse(fav_biden_2019==3,"somewhat unfavorable",
                        ifelse(fav_biden_2019==4,"very unfavorable",
                        ifelse(fav_biden_2019==5, "very unfavorable",
                        ifelse(fav_biden_2019==8, "Don't know",
                        ifelse(fav_biden_2019==98,"Skipped",NA))))))),
         tsmart = ifelse(tsmart_P2018_vote_type_2019 == 1, "poll",
                    ifelse(tsmart_P2018_vote_type_2019==2,"Early",
                    ifelse(tsmart_P2018_vote_type_2019==3, "Absentee",
                    ifelse(tsmart_P2018_vote_type_2019==4, "Mail",
                    ifelse(tsmart_P2018_vote_type_2019==5, "Yes",NA))))),
         Twoparties = ifelse(pid3_2019==1,"Democrat",
                      ifelse(pid3_2019==2,"Repubilcan",NA)))%>%
  select(Sanderfavorite,Bidenfavorite,tsmart_P2018_vote_type_2019,Twoparties)

DATA TABLE: Sander vs TwoParties

table(DATA$Sanderfavorite, DATA$Twoparties)%>%
  prop.table(2)

##                       
##                          Democrat Repubilcan
##   Don't Know           0.03827362 0.04264171
##   Skipped              0.01425081 0.01352054
##   Somewhat favorable   0.37662866 0.07748310
##   Somewhat unfavorable 0.10097720 0.14768591
##   very favorable       0.41449511 0.02028081
##   very unfavorable     0.05537459 0.69838794

sander vs TwoParties graph:

DATA%>%
  filter(!is.na(Sanderfavorite))%>%
  group_by(Twoparties,Sanderfavorite)%>%
  summarize(n=n())%>%
  mutate(percent=n/sum(n))%>%
  ggplot()+
  geom_col(aes(x=Twoparties,y=percent,fill=Sanderfavorite))

## `summarise()` regrouping output by 'Twoparties' (override with `.groups` argument)

Sander vs TwoParties chisq.test:

chisq.test(DATA$Sanderfavorite,DATA$Twoparties)

## 
##  Pearson's Chi-squared test
## 
## data:  DATA$Sanderfavorite and DATA$Twoparties
## X-squared = 2428.1, df = 5, p-value < 2.2e-16

There is a statistically significant relationship between TwoParties and Sanderfavorite.

table:Biden VS TWoParties

table(DATA$Bidenfavorite,DATA$Twoparties)%>%
  prop.table(2)

##                       
##                          Democrat Repubilcan
##   Don't know           0.04030945 0.04524181
##   Skipped              0.01262215 0.01144046
##   somewhat favorable   0.24063518 0.10452418
##   somewhat unfavorable 0.03542345 0.23348934
##   very favorable       0.64372964 0.03848154
##   very unfavorable     0.02728013 0.56682267

Biden vs Twoparties graph

DATA%>%
  filter(!is.na(Bidenfavorite))%>%
  group_by(Twoparties,Bidenfavorite)%>%
  summarize(n=n())%>%
  mutate(percent=n/sum(n))%>%
  ggplot()+
  geom_col(aes(x=Twoparties,y=percent,fill=Bidenfavorite))

## `summarise()` regrouping output by 'Twoparties' (override with `.groups` argument)

Chisq.test Biden vs Twoparties

chisq.test(DATA$Bidenfavorite,DATA$Twoparties)

## 
##  Pearson's Chi-squared test
## 
## data:  DATA$Bidenfavorite and DATA$Twoparties
## X-squared = 2690.6, df = 5, p-value < 2.2e-16

There is a statistically significant relationship between TwoParties and Bidenfavorite.

CONTINOUS DATA:

data_tsmart<-DATA%>%
  group_by(Twoparties)%>%
  summarize(AVG_ft_tsmart = mean(tsmart_P2018_vote_type_2019, na.rm = TRUE))%>%
  ggplot()+
  geom_bar(aes(x=mean,fill = Twoparties))

## `summarise()` ungrouping output (override with `.groups` argument)

DATA%>%
  group_by(tsmart_P2018_vote_type_2019 )%>%
  summarize(n=n())%>%
  mutate(percent=n/sum(n))%>%
ggplot()+
  geom_histogram(aes(x=tsmart_P2018_vote_type_2019 ))

## `summarise()` ungrouping output (override with `.groups` argument)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

DATA%>%
ggplot()+
  geom_histogram(aes(x=tsmart_P2018_vote_type_2019 ))+facet_wrap(~Twoparties)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

T.test:

t.test(tsmart_P2018_vote_type_2019~Twoparties, data = DATA)

## 
##  Welch Two Sample t-test
## 
## data:  tsmart_P2018_vote_type_2019 by Twoparties
## t = -0.011544, df = 4126.7, p-value = 0.9908
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.862555  2.829041
## sample estimates:
##   mean in group Democrat mean in group Repubilcan 
##                 53.37378                 53.39054

There is a statistical significance difference between Democrat and Repubilcan in their mean feeling towardtsmart_p2018_vote_type_2019

Conclusion: There wasn’t a difference between the two candiates, sanders and Biden. The only lead Biden lead has over sanders wasn’t as huge as someone might think. During this research I want to discover the two different canidiates in the 2019 primaries and how they are different. But there are no difference among the primaries.