library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
## Warning: package 'readr' was built under R version 3.6.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
data<-read_csv("/Users/rebeccagibble/Downloads/Voter Data 2019 (1).csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## weight_18_24_2018 = col_logical(),
## izip_2019 = col_character(),
## housevote_other_2019 = col_character(),
## senatevote_other_2019 = col_character(),
## senatevote2_other_2019 = col_character(),
## SenCand1Name_2019 = col_character(),
## SenCand1Party_2019 = col_character(),
## SenCand2Name_2019 = col_character(),
## SenCand2Party_2019 = col_character(),
## SenCand3Name_2019 = col_character(),
## SenCand3Party_2019 = col_character(),
## SenCand1Name2_2019 = col_character(),
## SenCand1Party2_2019 = col_character(),
## SenCand2Name2_2019 = col_character(),
## SenCand2Party2_2019 = col_character(),
## SenCand3Name2_2019 = col_character(),
## SenCand3Party2_2019 = col_character(),
## governorvote_other_2019 = col_character(),
## GovCand1Name_2019 = col_character(),
## GovCand1Party_2019 = col_character()
## # ... with 108 more columns
## )
## ℹ Use `spec()` for the full column specifications.
## Warning: 800 parsing failures.
## row col expected actual file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 '/Users/rebeccagibble/Downloads/Voter Data 2019 (1).csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 '/Users/rebeccagibble/Downloads/Voter Data 2019 (1).csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 '/Users/rebeccagibble/Downloads/Voter Data 2019 (1).csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 '/Users/rebeccagibble/Downloads/Voter Data 2019 (1).csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 '/Users/rebeccagibble/Downloads/Voter Data 2019 (1).csv'
## .... ................. .................. ................ ........................................................
## See problems(...) for more details.
head(data)
## # A tibble: 6 x 1,282
## weight_2016 weight_2017 weight_panel_20… weight_latino_2… weight_18_24_20…
## <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 0.358 0.438 0.503 NA NA
## 2 0.563 0.366 0.389 NA NA
## 3 0.552 0.550 0.684 NA NA
## 4 0.208 NA NA NA NA
## 5 0.334 0.346 0.322 NA NA
## 6 0.207 0.148 0.594 NA NA
## # … with 1,277 more variables: weight_overall_2018 <dbl>, weight_2019 <dbl>,
## # weight1_2018 <dbl>, weight1_2019 <dbl>, weight2_2019 <dbl>,
## # weight3_2019 <dbl>, cassfullcd <dbl>, vote2020_2019 <dbl>,
## # trumpapp_2019 <dbl>, fav_trump_2019 <dbl>, fav_obama_2019 <dbl>,
## # fav_hrc_2019 <dbl>, fav_sanders_2019 <dbl>, fav_putin_2019 <dbl>,
## # fav_schumer_2019 <dbl>, fav_pelosi_2019 <dbl>, fav_comey_2019 <dbl>,
## # fav_mueller_2019 <dbl>, fav_mcconnell_2019 <dbl>, fav_kavanaugh_2019 <dbl>,
## # fav_biden_2019 <dbl>, fav_warren_2019 <dbl>, fav_harris_2019 <dbl>,
## # fav_gillibrand_2019 <dbl>, fav_patrick_2019 <dbl>, fav_booker_2019 <dbl>,
## # fav_garcetti_2019 <dbl>, fav_klobuchar_2019 <dbl>, fav_gorsuch_2019 <dbl>,
## # fav_kasich_2019 <dbl>, fav_haley_2019 <dbl>, fav_bloomberg_2019 <dbl>,
## # fav_holder_2019 <dbl>, fav_avenatti_2019 <dbl>, fav_castro_2019 <dbl>,
## # fav_landrieu_2019 <dbl>, fav_orourke_2019 <dbl>,
## # fav_hickenlooper_2019 <dbl>, fav_pence_2019 <dbl>, add_confirm_2019 <dbl>,
## # izip_2019 <chr>, votereg_2019 <dbl>, votereg_f_2019 <dbl>,
## # regzip_2019 <dbl>, region_2019 <dbl>, turnout18post_2019 <dbl>,
## # tsmart_G2018_2019 <dbl>, tsmart_G2018_vote_type_2019 <dbl>,
## # tsmart_P2018_2019 <dbl>, tsmart_P2018_party_2019 <dbl>,
## # tsmart_P2018_vote_type_2019 <dbl>, housevote_2019 <dbl>,
## # housevote_other_2019 <chr>, senatevote_2019 <dbl>,
## # senatevote_other_2019 <chr>, senatevote2_2019 <dbl>,
## # senatevote2_other_2019 <chr>, SenCand1Name_2019 <chr>,
## # SenCand1Party_2019 <chr>, SenCand2Name_2019 <chr>,
## # SenCand2Party_2019 <chr>, SenCand3Name_2019 <chr>,
## # SenCand3Party_2019 <chr>, SenCand1Name2_2019 <chr>,
## # SenCand1Party2_2019 <chr>, SenCand2Name2_2019 <chr>,
## # SenCand2Party2_2019 <chr>, SenCand3Name2_2019 <chr>,
## # SenCand3Party2_2019 <chr>, governorvote_2019 <dbl>,
## # governorvote_other_2019 <chr>, GovCand1Name_2019 <chr>,
## # GovCand1Party_2019 <chr>, GovCand2Name_2019 <chr>,
## # GovCand2Party_2019 <chr>, GovCand3Name_2019 <chr>,
## # GovCand3Party_2019 <chr>, inst_court_2019 <dbl>, inst_media_2019 <dbl>,
## # inst_congress_2019 <dbl>, inst_justice_2019 <dbl>, inst_FBI_2019 <dbl>,
## # inst_military_2019 <dbl>, inst_church_2019 <dbl>, inst_business_2019 <dbl>,
## # Democrats_2019 <dbl>, Republicans_2019 <dbl>, Men_2019 <dbl>,
## # Women_2019 <dbl>, wm_2019 <dbl>, ww_2019 <dbl>, bm_2019 <dbl>,
## # bw_2019 <dbl>, hm_2019 <dbl>, hw_2019 <dbl>, rwm_2019 <dbl>,
## # rww_2019 <dbl>, rbm_2019 <dbl>, rbw_2019 <dbl>, pwm_2019 <dbl>, …
newdata<-data%>%
mutate(GenderEquality = ifelse(imiss_y_2019==1,"Very Important",
ifelse(imiss_y_2019==2,"Somewhat Important",
ifelse(imiss_y_2019==3, "Not very Important",
ifelse(imiss_y_2019==4, "Unimportant", NA)))),
GenderRoles = ifelse(sexism1_2019==1, "Strongly Agree",
ifelse(sexism1_2019==2, "Somewhat Agee",
ifelse(sexism1_2019==3, "Somewhat Disagree",
ifelse(sexism1_2019==4, "Strongly Disagree",NA)))),
ModernSexism = ifelse(sexism2_2019==1, "Strongly Agree",
ifelse(sexism2_2019==2, "Somewhat Agree",
ifelse(sexism2_2019==3, "Somewhat Disagree",
ifelse(sexism2_2019==4, "Strongly Disagree",NA)))),
FeelingAboutWomen = ifelse(Women_2019>100,NA, Women_2019))%>%
select(GenderEquality,GenderRoles,ModernSexism,FeelingAboutWomen,educ_2019)
head(newdata)
## # A tibble: 6 x 5
## GenderEquality GenderRoles ModernSexism FeelingAboutWom… educ_2019
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Very Important Strongly Disagr… Strongly Disagr… 80 5
## 2 <NA> <NA> <NA> NA NA
## 3 Very Important Strongly Disagr… Somewhat Disagr… 71 2
## 4 Not very Importa… Strongly Disagr… Strongly Agree 10 3
## 5 Somewhat Importa… Strongly Disagr… Somewhat Disagr… 95 5
## 6 Very Important Strongly Disagr… Strongly Disagr… 100 4
newdata2<-newdata%>%
filter(educ_2019 %in% c("2","5"))%>%
mutate(EducationLevel = ifelse(educ_2019==2, "High school graduate",
ifelse(educ_2019==5, "College graduate",NA)))%>%
select(GenderEquality,GenderRoles,ModernSexism,FeelingAboutWomen,EducationLevel)
head(newdata2)
## # A tibble: 6 x 5
## GenderEquality GenderRoles ModernSexism FeelingAboutWom… EducationLevel
## <chr> <chr> <chr> <dbl> <chr>
## 1 Very Important Strongly Dis… Strongly Disa… 80 College gradua…
## 2 Very Important Strongly Dis… Somewhat Disa… 71 High school gr…
## 3 Somewhat Import… Strongly Dis… Somewhat Disa… 95 College gradua…
## 4 Very Important Strongly Dis… Strongly Disa… 99 College gradua…
## 5 Very Important Strongly Dis… Strongly Disa… 82 College gradua…
## 6 Very Important Strongly Dis… Strongly Disa… 99 College gradua…
table(newdata2$GenderEquality,newdata2$EducationLevel)%>%
prop.table(2)
##
## College graduate High school graduate
## Not very Important 0.1607247 0.1713198
## Somewhat Important 0.2916423 0.3362944
## Unimportant 0.1455289 0.1472081
## Very Important 0.4021040 0.3451777
newdata2%>%
filter(!is.na(GenderEquality))%>%
group_by(EducationLevel,GenderEquality)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
ggplot()+
geom_col(aes(x=EducationLevel,y=percent,fill=GenderEquality))+
theme_minimal()
## `summarise()` regrouping output by 'EducationLevel' (override with `.groups` argument)
chisq.test(newdata2$GenderEquality,newdata2$EducationLevel)
##
## Pearson's Chi-squared test
##
## data: newdata2$GenderEquality and newdata2$EducationLevel
## X-squared = 12.889, df = 3, p-value = 0.004883
table(newdata2$GenderRoles,newdata2$EducationLevel)%>%
prop.table(2)
##
## College graduate High school graduate
## Somewhat Agee 0.10599884 0.16379860
## Somewhat Disagree 0.22947001 0.29063098
## Strongly Agree 0.03669190 0.07329509
## Strongly Disagree 0.62783925 0.47227533
newdata2%>%
filter(!is.na(GenderRoles))%>%
group_by(EducationLevel,GenderRoles)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
ggplot()+
geom_col(aes(x=EducationLevel,y=percent,fill=GenderRoles))+
theme_minimal()
## `summarise()` regrouping output by 'EducationLevel' (override with `.groups` argument)
chisq.test(newdata2$GenderRoles,newdata2$EducationLevel)
##
## Pearson's Chi-squared test
##
## data: newdata2$GenderRoles and newdata2$EducationLevel
## X-squared = 88.475, df = 3, p-value < 2.2e-16
table(newdata2$ModernSexism,newdata2$EducationLevel)%>%
prop.table(2)
##
## College graduate High school graduate
## Somewhat Agree 0.2216405 0.2305236
## Somewhat Disagree 0.2082606 0.2688378
## Strongly Agree 0.1436882 0.1819923
## Strongly Disagree 0.4264107 0.3186462
newdata2%>%
filter(!is.na(ModernSexism))%>%
group_by(EducationLevel,ModernSexism)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
ggplot()+
geom_col(aes(x=EducationLevel,y=percent,fill=ModernSexism))+
theme_minimal()
## `summarise()` regrouping output by 'EducationLevel' (override with `.groups` argument)
chisq.test(newdata2$ModernSexism,newdata2$EducationLevel)
##
## Pearson's Chi-squared test
##
## data: newdata2$ModernSexism and newdata2$EducationLevel
## X-squared = 45.766, df = 3, p-value = 6.359e-10
newdata3<-newdata2%>%
filter(EducationLevel %in% c("College graduate","High school graduate"))%>%
group_by(EducationLevel)%>%
summarize(Avg_FT_Women = mean(FeelingAboutWomen,na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
newdata3
## # A tibble: 2 x 2
## EducationLevel Avg_FT_Women
## <chr> <dbl>
## 1 College graduate 76.8
## 2 High school graduate 76.9
ggplot()+
geom_col(data=newdata3,aes(x=EducationLevel,y=Avg_FT_Women,fill=Avg_FT_Women))
newdata2%>%
ggplot()+
geom_histogram(aes(x=FeelingAboutWomen))+
facet_wrap(~EducationLevel)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 172 rows containing non-finite values (stat_bin).
data1=college_data<-newdata2%>%
filter(EducationLevel=="College graduate")
data2=highschool_data<-newdata2%>%
filter(EducationLevel=="High school graduate")
data1=college_data<-replicate(10000,
sample(newdata2$FeelingAboutWomen, 40)%>%mean(na.rm=TRUE))%>%
data.frame()%>%
rename("mean"=1)
data2=highschool_data<-replicate(10000,
sample(newdata2$FeelingAboutWomen, 40)%>%mean(na.rm=TRUE)
)%>%
data.frame()%>%
rename("mean"=1)
ggplot()+
geom_histogram(data=college_data,aes(x=mean),fill="red",alpha=0.5)+
geom_histogram(data=highschool_data,aes(x=mean),fill="green",alpha=0.5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
t.test(FeelingAboutWomen~EducationLevel, data=newdata2)
##
## Welch Two Sample t-test
##
## data: FeelingAboutWomen by EducationLevel
## t = -0.14775, df = 3105.1, p-value = 0.8826
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.578124 1.356953
## sample estimates:
## mean in group College graduate mean in group High school graduate
## 76.78163 76.89222