Assigment 5: Wrangling Data with dplyr
Variables used were the same as in Assignment # 4. They are:-
Family Income (faminc_2016)
Type of Area (urbancity_baseline)
Education level (educ_baseline)
Political Ideology (post_ideo5_2012)
2016 Voter Election (presvote16post_2016)
Immigration reform (immi_makedifficult_baseline)
Question 1:
Does the type of area you live affect family income?
Responses that did not meet the conditions for this study were marked as NA which stands for not applicable
The table below displays cross tabulations with the proportions displayed for the observations
knitr::opts_chunk$set(echo = TRUE)
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
library(readr)
library(dplyr)
#read_csv("/Users/safiesaf/Downloads/VOTER_Survey_July17_Release1-csv.csv")
VoterData<-read_csv("/Users/safiesaf/Downloads/VOTER_Survey_July17_Release1-csv.csv")
NewVoterData<-VoterData%>%
rename("TypeofArea"=urbancity_baseline,
"FamilyIncome"=faminc_2016,
"EducationLevel"=educ_baseline,
"PoliticalIdeology"=post_ideo5_2012,
"VoterElection"=presvote16post_2016,
"ImmigrationReform"=immi_makedifficult_baseline)%>%
select(TypeofArea,
FamilyIncome,
EducationLevel,
PoliticalIdeology,
VoterElection,
ImmigrationReform)%>%
mutate(TypeofArea=ifelse(TypeofArea==1,"City",
ifelse(TypeofArea==2,"Suburb",
ifelse(TypeofArea==3,"Town",
ifelse(TypeofArea==4,"Rural Area",
ifelse(TypeofArea==5,"Other",NA))))),
FamilyIncome=ifelse(FamilyIncome>=1 & FamilyIncome<=5,"$0-49,000",
ifelse(FamilyIncome>=6 & FamilyIncome<=9,"$50,000-99,999",
ifelse(FamilyIncome>=10 & FamilyIncome<=13,"$100,000-249,999",
ifelse(FamilyIncome>=14& FamilyIncome<=31,"$250,000",
ifelse(FamilyIncome>=32&FamilyIncome<=97,"Other",NA))))))
table(NewVoterData$FamilyIncome,NewVoterData$TypeofArea)
##
## City Other Rural Area Suburb Town
## $0-49,000 872 15 613 841 410
## $100,000-249,999 388 11 200 739 187
## $250,000 49 1 18 82 10
## $50,000-99,999 649 13 467 944 367
## Other 301 11 185 396 167
NewVoterFamIncTable<-table(NewVoterData$FamilyIncome,NewVoterData$TypeofArea)
prop.table(NewVoterFamIncTable,2)
##
## City Other Rural Area Suburb
## $0-49,000 0.386011510 0.294117647 0.413351315 0.280146569
## $100,000-249,999 0.171757415 0.215686275 0.134861767 0.246169221
## $250,000 0.021691014 0.019607843 0.012137559 0.027315123
## $50,000-99,999 0.287295263 0.254901961 0.314902225 0.314457029
## Other 0.133244799 0.215686275 0.124747134 0.131912059
##
## Town
## $0-49,000 0.359333918
## $100,000-249,999 0.163891323
## $250,000 0.008764242
## $50,000-99,999 0.321647677
## Other 0.146362840
Question 2:
Does Education level have an impact on political ideology?
Responses that did not meet the conditions for this study were marked as NA which stands for not applicable
The table below displays cross tabulations with the proportions displayed for the observations
NewVoterData<-NewVoterData%>%
mutate(EducationLevel=ifelse(EducationLevel==1,"No HS",
ifelse(EducationLevel==2,"HS Graduate",
ifelse(EducationLevel==3,"Some College",
ifelse(EducationLevel==4,"2 Year",
ifelse(EducationLevel==5,"4 Year",
ifelse(EducationLevel==6,"Post Grad",NA)))))),
PoliticalIdeology=ifelse(PoliticalIdeology==1,"Very Liberal",
ifelse(PoliticalIdeology==2,"Liberal",
ifelse(PoliticalIdeology==3,"Moderate",
ifelse(PoliticalIdeology==4,"Conservative",
ifelse(PoliticalIdeology==5,"Very Conservative",NA))))))
table(NewVoterData$EducationLevel,NewVoterData$PoliticalIdeology)
##
## Conservative Liberal Moderate Very Conservative
## 2 Year 222 123 299 75
## 4 Year 495 376 690 182
## HS Graduate 599 263 650 241
## No HS 48 19 45 23
## Post Grad 276 269 422 100
## Some College 492 336 661 183
##
## Very Liberal
## 2 Year 38
## 4 Year 187
## HS Graduate 94
## No HS 7
## Post Grad 116
## Some College 133
EduLevlPoliIdeoTable<-table(NewVoterData$EducationLevel,NewVoterData$PoliticalIdeology)
prop.table(EduLevlPoliIdeoTable,2)
##
## Conservative Liberal Moderate Very Conservative
## 2 Year 0.10412758 0.08874459 0.10805927 0.09328358
## 4 Year 0.23217636 0.27128427 0.24936755 0.22636816
## HS Graduate 0.28095685 0.18975469 0.23491146 0.29975124
## No HS 0.02251407 0.01370851 0.01626310 0.02860697
## Post Grad 0.12945591 0.19408369 0.15251175 0.12437811
## Some College 0.23076923 0.24242424 0.23888688 0.22761194
##
## Very Liberal
## 2 Year 0.06608696
## 4 Year 0.32521739
## HS Graduate 0.16347826
## No HS 0.01217391
## Post Grad 0.20173913
## Some College 0.23130435
Question 3:
Does who you vote for in the 2016 election have an impact on immgration reform?
Responses that did not meet the conditions for this study were marked as NA which stands for not applicable
The table below displays cross tabulations with the proportions displayed for the observations
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
NewVoterData<-NewVoterData%>%
mutate(VoterElection=ifelse(VoterElection==1,"Hillary Clinton",
ifelse(VoterElection==2,"Donald Trump",
ifelse(VoterElection==3,"Gary Johnson",
ifelse(VoterElection==4,"Jill Stein",
ifelse(VoterElection==5,"Evan McMullin",NA))))),
ImmigrationReform=ifelse(ImmigrationReform==1,"Much easier",
ifelse(ImmigrationReform==2,"Slightly easier",
ifelse(ImmigrationReform==3,"No change",
ifelse(ImmigrationReform==4,"Slightly harder",
ifelse(ImmigrationReform==5,"Much harder",NA))))))
table(NewVoterData$VoterElection,NewVoterData$ImmigrationReform)
##
## Much easier Much harder No change Slightly easier
## Donald Trump 203 1091 879 501
## Evan McMullin 5 2 5 8
## Gary Johnson 37 45 42 55
## Hillary Clinton 406 546 847 766
## Jill Stein 19 17 21 25
##
## Slightly harder
## Donald Trump 640
## Evan McMullin 2
## Gary Johnson 40
## Hillary Clinton 640
## Jill Stein 17
VoteElecImmiRefTable<-table(NewVoterData$VoterElection,NewVoterData$ImmigrationReform)
prop.table(VoteElecImmiRefTable,2)
##
## Much easier Much harder No change Slightly easier
## Donald Trump 0.302985075 0.641387419 0.489966555 0.369741697
## Evan McMullin 0.007462687 0.001175779 0.002787068 0.005904059
## Gary Johnson 0.055223881 0.026455026 0.023411371 0.040590406
## Hillary Clinton 0.605970149 0.320987654 0.472129320 0.565313653
## Jill Stein 0.028358209 0.009994121 0.011705686 0.018450185
##
## Slightly harder
## Donald Trump 0.477968633
## Evan McMullin 0.001493652
## Gary Johnson 0.029873040
## Hillary Clinton 0.477968633
## Jill Stein 0.012696042