Safiya

Sociology 333

Professor Turner

10/3/2018

Assigment 5: Wrangling Data with dplyr

Variables used were the same as in Assignment # 4. They are:-

  1. Family Income (faminc_2016)

  2. Type of Area (urbancity_baseline)

  3. Education level (educ_baseline)

  4. Political Ideology (post_ideo5_2012)

  5. 2016 Voter Election (presvote16post_2016)

  6. Immigration reform (immi_makedifficult_baseline)

Question 1:

Does the type of area you live affect family income?

Responses that did not meet the conditions for this study were marked as NA which stands for not applicable

The table below displays cross tabulations with the proportions displayed for the observations

knitr::opts_chunk$set(
    echo = FALSE)

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#read_csv("/Users/safiesaf/Downloads/VOTER_Survey_July17_Release1-csv.csv")

VoterData<-read_csv("/Users/safiesaf/Downloads/VOTER_Survey_July17_Release1-csv.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   weight_2017 = col_double(),
##   redovote2016_t_2017 = col_character(),
##   job_title_t_2017 = col_character(),
##   weight_2016 = col_double(),
##   izip_2016 = col_character(),
##   presvote16post_t_2016 = col_character(),
##   second_chance_t_2016 = col_character(),
##   race_other_2016 = col_character(),
##   healthcov_t_2016 = col_character(),
##   employ_t_2016 = col_character(),
##   pid3_t_2016 = col_character(),
##   religpew_t_2016 = col_character(),
##   votemeth16_rnd_2016 = col_character(),
##   presvote16post_rnd_2016 = col_character(),
##   vote2016_cand2_rnd_2016 = col_character(),
##   Clinton_Rubio_rnd_2016 = col_character(),
##   Clinton_Cruz_rnd_2016 = col_character(),
##   Sanders_Trump_rnd_2016 = col_character(),
##   Sanders_Rubio_rnd_2016 = col_character(),
##   second_chance_rnd_2016 = col_character()
##   # ... with 123 more columns
## )
## See spec(...) for full column specifications.
NewVoterData<-rename(VoterData,"TypeofArea"=urbancity_baseline,
                                            "FamilyIncome"=faminc_2016,
                                           "EducationLevel"=educ_baseline,
                                           "PoliticalIdeology"=post_ideo5_2012,
                                           "VoterElection"=presvote16post_2016,
                                        "ImmigrationReform"=immi_makedifficult_baseline)



NewVoterData<-select(NewVoterData,
                     TypeofArea,
                     FamilyIncome,
                     EducationLevel,
                     PoliticalIdeology,
                     VoterElection,
                     ImmigrationReform)

head(NewVoterData)
## # A tibble: 6 x 6
##   TypeofArea FamilyIncome EducationLevel PoliticalIdeolo… VoterElection
##        <int>        <int>          <int>            <int>         <int>
## 1          2           97              5                3             1
## 2          4            7              3                4             2
## 3          1            6              2                3             1
## 4          1            8              3                3             3
## 5          2            5              5                4             2
## 6          2            4              4                1             1
## # ... with 1 more variable: ImmigrationReform <int>
NewVoterData<-mutate(NewVoterData,
                     TypeofArea=ifelse(TypeofArea==1,"City",
                        ifelse(TypeofArea==2,"Suburb",
                        ifelse(TypeofArea==3,"Town",
                        ifelse(TypeofArea==4,"Rural Area",
                        ifelse(TypeofArea==5,"Other",NA))))))

head(NewVoterData$TypeofArea)
## [1] "Suburb"     "Rural Area" "City"       "City"       "Suburb"    
## [6] "Suburb"
NewVoterData<-mutate(NewVoterData,
                     FamilyIncome=ifelse(FamilyIncome>=1 & FamilyIncome<=5,"$0-49,000",
                                         ifelse(FamilyIncome>=6 & FamilyIncome<=9,"$50,000:99,999",
                                         ifelse(FamilyIncome>=10 & FamilyIncome<=13,"$100,000:249,999",
                                         ifelse(FamilyIncome>=14 & FamilyIncome<=31,"$250,000",
                                         ifelse(FamilyIncome>=32 & FamilyIncome<=97,"Other",NA))))))

head(NewVoterData$FamilyIncome)
## [1] "Other"          "$50,000:99,999" "$50,000:99,999" "$50,000:99,999"
## [5] "$0-49,000"      "$0-49,000"
table(NewVoterData$TypeofArea,NewVoterData$FamilyIncome)
##             
##              $0-49,000 $100,000:249,999 $250,000 $50,000:99,999 Other
##   City             872              388       49            649   301
##   Other             15               11        1             13    11
##   Rural Area       613              200       18            467   185
##   Suburb           841              739       82            944   396
##   Town             410              187       10            367   167
NewVoterFamIncTable<-table(NewVoterData$TypeofArea,NewVoterData$FamilyIncome)


prop.table(NewVoterFamIncTable)
##             
##                 $0-49,000 $100,000:249,999     $250,000 $50,000:99,999
##   City       0.1098790323     0.0488911290 0.0061743952   0.0817792339
##   Other      0.0018901210     0.0013860887 0.0001260081   0.0016381048
##   Rural Area 0.0772429435     0.0252016129 0.0022681452   0.0588457661
##   Suburb     0.1059727823     0.0931199597 0.0103326613   0.1189516129
##   Town       0.0516633065     0.0235635081 0.0012600806   0.0462449597
##             
##                     Other
##   City       0.0379284274
##   Other      0.0013860887
##   Rural Area 0.0233114919
##   Suburb     0.0498991935
##   Town       0.0210433468

Question 2:

Does Education level have an impact on political ideology?

Responses that did not meet the conditions for this study were marked as NA which stands for not applicable

The table below displays cross tabulations with the proportions displayed for the observations

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
## [1] "4 Year"       "Some College" "HS Graduate"  "Some College"
## [5] "4 Year"       "2 Year"
## [1] "Moderate"     "Conservative" "Moderate"     "Moderate"    
## [5] "Conservative" "Very Liberal"
##               
##                Conservative Liberal Moderate Very Conservative
##   2 Year                222     123      299                75
##   4 Year                495     376      690               182
##   HS Graduate           599     263      650               241
##   No HS                  48      19       45                23
##   Post Grad             276     269      422               100
##   Some College          492     336      661               183
##               
##                Very Liberal
##   2 Year                 38
##   4 Year                187
##   HS Graduate            94
##   No HS                   7
##   Post Grad             116
##   Some College          133
##               
##                Conservative      Liberal     Moderate Very Conservative
##   2 Year       0.0289665971 0.0160490605 0.0390135699      0.0097860125
##   4 Year       0.0645876827 0.0490605428 0.0900313152      0.0237473904
##   HS Graduate  0.0781576200 0.0343162839 0.0848121086      0.0314457203
##   No HS        0.0062630480 0.0024791232 0.0058716075      0.0030010438
##   Post Grad    0.0360125261 0.0350991649 0.0550626305      0.0130480167
##   Some College 0.0641962422 0.0438413361 0.0862473904      0.0238778706
##               
##                Very Liberal
##   2 Year       0.0049582463
##   4 Year       0.0243997912
##   HS Graduate  0.0122651357
##   No HS        0.0009133612
##   Post Grad    0.0151356994
##   Some College 0.0173538622

Question 3:

Does who you vote for in the 2016 election have an impact on immgration reform?

Responses that did not meet the conditions for this study were marked as NA which stands for not applicable

The table below displays cross tabulations with the proportions displayed for the observations

## [1] "Hillary Clinton" "Donald Trump"    "Hillary Clinton" "Gary Johnson"   
## [5] "Donald Trump"    "Hillary Clinton"
## [1] "Slightly easier" "No change"       "Much easier"     "Much easier"    
## [5] "Slightly easier" "Slightly harder"
##                  
##                   Much easier Much harder No change Slightly easier
##   Donald Trump            203        1091       879             501
##   Evan McMullin             5           2         5               8
##   Gary Johnson             37          45        42              55
##   Hillary Clinton         406         546       847             766
##   Jill Stein               19          17        21              25
##                  
##                   Slightly harder
##   Donald Trump                640
##   Evan McMullin                 2
##   Gary Johnson                 40
##   Hillary Clinton             640
##   Jill Stein                   17
##                  
##                    Much easier  Much harder    No change Slightly easier
##   Donald Trump    0.0295961510 0.1590610876 0.1281527920    0.0730427176
##   Evan McMullin   0.0007289692 0.0002915877 0.0007289692    0.0011663508
##   Gary Johnson    0.0053943724 0.0065607231 0.0061233416    0.0080186616
##   Hillary Clinton 0.0591923021 0.0796034407 0.1234873888    0.1116780872
##   Jill Stein      0.0027700831 0.0024784954 0.0030616708    0.0036448462
##                  
##                   Slightly harder
##   Donald Trump       0.0933080624
##   Evan McMullin      0.0002915877
##   Gary Johnson       0.0058317539
##   Hillary Clinton    0.0933080624
##   Jill Stein         0.0024784954