IMPORT DATA
library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data<-read_csv("C:/Users/JaminS/Downloads/Voter Data 2019.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## weight_18_24_2018 = col_logical(),
## izip_2019 = col_character(),
## housevote_other_2019 = col_character(),
## senatevote_other_2019 = col_character(),
## senatevote2_other_2019 = col_character(),
## SenCand1Name_2019 = col_character(),
## SenCand1Party_2019 = col_character(),
## SenCand2Name_2019 = col_character(),
## SenCand2Party_2019 = col_character(),
## SenCand3Name_2019 = col_character(),
## SenCand3Party_2019 = col_character(),
## SenCand1Name2_2019 = col_character(),
## SenCand1Party2_2019 = col_character(),
## SenCand2Name2_2019 = col_character(),
## SenCand2Party2_2019 = col_character(),
## SenCand3Name2_2019 = col_character(),
## SenCand3Party2_2019 = col_character(),
## governorvote_other_2019 = col_character(),
## GovCand1Name_2019 = col_character(),
## GovCand1Party_2019 = col_character()
## # ... with 108 more columns
## )
## i Use `spec()` for the full column specifications.
## Warning: 800 parsing failures.
## row col expected actual file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 'C:/Users/JaminS/Downloads/Voter Data 2019.csv'
## .... ................. .................. ................ ...............................................
## See problems(...) for more details.
head(data)
## # A tibble: 6 x 1,282
## weight_2016 weight_2017 weight_panel_20~ weight_latino_2~ weight_18_24_20~
## <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 0.358 0.438 0.503 NA NA
## 2 0.563 0.366 0.389 NA NA
## 3 0.552 0.550 0.684 NA NA
## 4 0.208 NA NA NA NA
## 5 0.334 0.346 0.322 NA NA
## 6 0.207 0.148 0.594 NA NA
## # ... with 1,277 more variables: weight_overall_2018 <dbl>, weight_2019 <dbl>,
## # weight1_2018 <dbl>, weight1_2019 <dbl>, weight2_2019 <dbl>,
## # weight3_2019 <dbl>, cassfullcd <dbl>, vote2020_2019 <dbl>,
## # trumpapp_2019 <dbl>, fav_trump_2019 <dbl>, fav_obama_2019 <dbl>,
## # fav_hrc_2019 <dbl>, fav_sanders_2019 <dbl>, fav_putin_2019 <dbl>,
## # fav_schumer_2019 <dbl>, fav_pelosi_2019 <dbl>, fav_comey_2019 <dbl>,
## # fav_mueller_2019 <dbl>, fav_mcconnell_2019 <dbl>, fav_kavanaugh_2019 <dbl>,
## # fav_biden_2019 <dbl>, fav_warren_2019 <dbl>, fav_harris_2019 <dbl>,
## # fav_gillibrand_2019 <dbl>, fav_patrick_2019 <dbl>, fav_booker_2019 <dbl>,
## # fav_garcetti_2019 <dbl>, fav_klobuchar_2019 <dbl>, fav_gorsuch_2019 <dbl>,
## # fav_kasich_2019 <dbl>, fav_haley_2019 <dbl>, fav_bloomberg_2019 <dbl>,
## # fav_holder_2019 <dbl>, fav_avenatti_2019 <dbl>, fav_castro_2019 <dbl>,
## # fav_landrieu_2019 <dbl>, fav_orourke_2019 <dbl>,
## # fav_hickenlooper_2019 <dbl>, fav_pence_2019 <dbl>, add_confirm_2019 <dbl>,
## # izip_2019 <chr>, votereg_2019 <dbl>, votereg_f_2019 <dbl>,
## # regzip_2019 <dbl>, region_2019 <dbl>, turnout18post_2019 <dbl>,
## # tsmart_G2018_2019 <dbl>, tsmart_G2018_vote_type_2019 <dbl>,
## # tsmart_P2018_2019 <dbl>, tsmart_P2018_party_2019 <dbl>,
## # tsmart_P2018_vote_type_2019 <dbl>, housevote_2019 <dbl>,
## # housevote_other_2019 <chr>, senatevote_2019 <dbl>,
## # senatevote_other_2019 <chr>, senatevote2_2019 <dbl>,
## # senatevote2_other_2019 <chr>, SenCand1Name_2019 <chr>,
## # SenCand1Party_2019 <chr>, SenCand2Name_2019 <chr>,
## # SenCand2Party_2019 <chr>, SenCand3Name_2019 <chr>,
## # SenCand3Party_2019 <chr>, SenCand1Name2_2019 <chr>,
## # SenCand1Party2_2019 <chr>, SenCand2Name2_2019 <chr>,
## # SenCand2Party2_2019 <chr>, SenCand3Name2_2019 <chr>,
## # SenCand3Party2_2019 <chr>, governorvote_2019 <dbl>,
## # governorvote_other_2019 <chr>, GovCand1Name_2019 <chr>,
## # GovCand1Party_2019 <chr>, GovCand2Name_2019 <chr>,
## # GovCand2Party_2019 <chr>, GovCand3Name_2019 <chr>,
## # GovCand3Party_2019 <chr>, inst_court_2019 <dbl>, inst_media_2019 <dbl>,
## # inst_congress_2019 <dbl>, inst_justice_2019 <dbl>, inst_FBI_2019 <dbl>,
## # inst_military_2019 <dbl>, inst_church_2019 <dbl>, inst_business_2019 <dbl>,
## # Democrats_2019 <dbl>, Republicans_2019 <dbl>, Men_2019 <dbl>,
## # Women_2019 <dbl>, wm_2019 <dbl>, ww_2019 <dbl>, bm_2019 <dbl>,
## # bw_2019 <dbl>, hm_2019 <dbl>, hw_2019 <dbl>, rwm_2019 <dbl>,
## # rww_2019 <dbl>, rbm_2019 <dbl>, rbw_2019 <dbl>, pwm_2019 <dbl>, ...
data%>%
select(fav_sanders_2019,fav_biden_2019,tsmart_P2018_vote_type_2019, pid3_2019)
## # A tibble: 9,548 x 4
## fav_sanders_2019 fav_biden_2019 tsmart_P2018_vote_type_2019 pid3_2019
## <dbl> <dbl> <dbl> <dbl>
## 1 1 1 3 1
## 2 NA NA 99 NA
## 3 2 1 1 3
## 4 4 1 99 3
## 5 3 2 99 2
## 6 3 1 3 1
## 7 3 1 5 1
## 8 NA NA 99 NA
## 9 NA NA 99 NA
## 10 2 3 5 1
## # ... with 9,538 more rows
Introduction:
This research is based on how democrats and republican parties in 2019 oppinion among the two candiates, biden and sanders. My continious data is tsmart_P2018_vote_type_2019. Fav_sanders_2019 and Fav_biden_2019 is going to be the cateorial data. and pid3_2019 is going to be my responders of the group.
DATA<-data%>%
select(fav_sanders_2019,fav_biden_2019,tsmart_P2018_vote_type_2019,pid3_2019)%>%
mutate(Sanderfavorite = ifelse(fav_sanders_2019==1, "very favorable",
ifelse(fav_sanders_2019==2, "Somewhat favorable",
ifelse(fav_sanders_2019==3, "Somewhat unfavorable",
ifelse(fav_sanders_2019==4,"very unfavorable",
ifelse(fav_sanders_2019==5, "very unfavorable",
ifelse(fav_sanders_2019==8, "Don't Know",
ifelse(fav_sanders_2019==98, "Skipped",NA))))))),
Bidenfavorite = ifelse(fav_biden_2019==1, "very favorable",
ifelse(fav_biden_2019==2,"somewhat favorable",
ifelse(fav_biden_2019==3,"somewhat unfavorable",
ifelse(fav_biden_2019==4,"very unfavorable",
ifelse(fav_biden_2019==5, "very unfavorable",
ifelse(fav_biden_2019==8, "Don't know",
ifelse(fav_biden_2019==98,"Skipped",NA))))))),
tsmart = ifelse(tsmart_P2018_vote_type_2019 == 1, "poll",
ifelse(tsmart_P2018_vote_type_2019==2,"Early",
ifelse(tsmart_P2018_vote_type_2019==3, "Absentee",
ifelse(tsmart_P2018_vote_type_2019==4, "Mail",
ifelse(tsmart_P2018_vote_type_2019==5, "Yes",NA))))),
Twoparties = ifelse(pid3_2019==1,"Democrat",
ifelse(pid3_2019==2,"Repubilcan",NA)))%>%
select(Sanderfavorite,Bidenfavorite,tsmart_P2018_vote_type_2019,Twoparties)
DATA TABLE: Sander vs TwoParties
table(DATA$Sanderfavorite, DATA$Twoparties)%>%
prop.table(2)
##
## Democrat Repubilcan
## Don't Know 0.03827362 0.04264171
## Skipped 0.01425081 0.01352054
## Somewhat favorable 0.37662866 0.07748310
## Somewhat unfavorable 0.10097720 0.14768591
## very favorable 0.41449511 0.02028081
## very unfavorable 0.05537459 0.69838794
sander vs TwoParties graph:
DATA%>%
filter(!is.na(Sanderfavorite))%>%
group_by(Twoparties,Sanderfavorite)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
ggplot()+
geom_col(aes(x=Twoparties,y=percent,fill=Sanderfavorite))
## `summarise()` regrouping output by 'Twoparties' (override with `.groups` argument)
Sander vs TwoParties chisq.test:
chisq.test(DATA$Sanderfavorite,DATA$Twoparties)
##
## Pearson's Chi-squared test
##
## data: DATA$Sanderfavorite and DATA$Twoparties
## X-squared = 2428.1, df = 5, p-value < 2.2e-16
There is a statistically significant relationship between TwoParties and Sanderfavorite.
table:Biden VS TWoParties
table(DATA$Bidenfavorite,DATA$Twoparties)%>%
prop.table(2)
##
## Democrat Repubilcan
## Don't know 0.04030945 0.04524181
## Skipped 0.01262215 0.01144046
## somewhat favorable 0.24063518 0.10452418
## somewhat unfavorable 0.03542345 0.23348934
## very favorable 0.64372964 0.03848154
## very unfavorable 0.02728013 0.56682267
Biden vs Twoparties graph
DATA%>%
filter(!is.na(Bidenfavorite))%>%
group_by(Twoparties,Bidenfavorite)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
ggplot()+
geom_col(aes(x=Twoparties,y=percent,fill=Bidenfavorite))
## `summarise()` regrouping output by 'Twoparties' (override with `.groups` argument)
Chisq.test Biden vs Twoparties
chisq.test(DATA$Bidenfavorite,DATA$Twoparties)
##
## Pearson's Chi-squared test
##
## data: DATA$Bidenfavorite and DATA$Twoparties
## X-squared = 2690.6, df = 5, p-value < 2.2e-16
There is a statistically significant relationship between TwoParties and Bidenfavorite.
CONTINOUS DATA:
data_tsmart<-DATA%>%
group_by(Twoparties)%>%
summarize(AVG_ft_tsmart = mean(tsmart_P2018_vote_type_2019, na.rm = TRUE))%>%
ggplot()+
geom_bar(aes(x=mean,fill = Twoparties))
## `summarise()` ungrouping output (override with `.groups` argument)
DATA%>%
group_by(tsmart_P2018_vote_type_2019 )%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
ggplot()+
geom_histogram(aes(x=tsmart_P2018_vote_type_2019 ))
## `summarise()` ungrouping output (override with `.groups` argument)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
DATA%>%
ggplot()+
geom_histogram(aes(x=tsmart_P2018_vote_type_2019 ))+facet_wrap(~Twoparties)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
T.test:
t.test(tsmart_P2018_vote_type_2019~Twoparties, data = DATA)
##
## Welch Two Sample t-test
##
## data: tsmart_P2018_vote_type_2019 by Twoparties
## t = -0.011544, df = 4126.7, p-value = 0.9908
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.862555 2.829041
## sample estimates:
## mean in group Democrat mean in group Repubilcan
## 53.37378 53.39054
There is a statistical significance difference between Democrat and Repubilcan in their mean feeling towardtsmart_p2018_vote_type_2019
Conclusion: There wasn’t a difference between the two candiates, sanders and Biden. The only lead Biden lead has over sanders wasn’t as huge as someone might think. During this research I want to discover the two different canidiates in the 2019 primaries and how they are different. But there are no difference among the primaries.