#install.packages("readr")
library(readr)
library(knitr)
voterdata<-read_csv("/Users/meiminshan/Desktop/Abbreviated Dataset Labeled(October Only)V2.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   NumChildren = col_double(),
##   ft_fem_2017 = col_double(),
##   ft_immig_2017 = col_double(),
##   ft_police_2017 = col_double(),
##   ft_dem_2017 = col_double(),
##   ft_rep_2017 = col_double(),
##   ft_evang_2017 = col_double(),
##   ft_muslim_2017 = col_double(),
##   ft_jew_2017 = col_double(),
##   ft_christ_2017 = col_double(),
##   ft_gays_2017 = col_double(),
##   ft_unions_2017 = col_double(),
##   ft_altright_2017 = col_double(),
##   ft_black_2017 = col_double(),
##   ft_white_2017 = col_double(),
##   ft_hisp_2017 = col_double()
## )
## See spec(...) for full column specifications.
head(voterdata)
## # A tibble: 6 x 51
##   gender race  education familyincome children region urbancity Vote2012
##   <chr>  <chr> <chr>     <chr>        <chr>    <chr>  <chr>     <chr>   
## 1 Female White 4-year    Prefer not … No       West   Suburb    Barack …
## 2 Female White Some Col… $60K-$69,999 No       West   Rural Ar… Mitt Ro…
## 3 Male   White High Sch… $50K-$59,999 No       Midwe… City      Mitt Ro…
## 4 Male   White Some Col… $70K-$79,999 No       South  City      Barack …
## 5 Male   White 4-year    $40K-$49,999 No       South  Suburb    Mitt Ro…
## 6 Female White 2-year    $30K-$39,999 No       West   Suburb    Barack …
## # … with 43 more variables: Vote2016 <chr>, TrumpSanders <chr>,
## #   PartyRegistration <chr>, PartyIdentification <chr>,
## #   PartyIdentification2 <chr>, PartyIdentification3 <chr>,
## #   NewsPublicAffairs <chr>, DemPrimary <chr>, RepPrimary <chr>,
## #   ImmigrantContributions <chr>, ImmigrantNaturalization <chr>,
## #   ImmigrationShouldBe <chr>, Abortion <chr>, GayMarriage <chr>,
## #   DeathPenalty <chr>, DeathPenaltyFreq <chr>, TaxWealthy <chr>,
## #   Healthcare <chr>, GlobWarmExist <chr>, GlobWarmingSerious <chr>,
## #   AffirmativeAction <chr>, Religion <chr>, ReligiousImportance <chr>,
## #   ChurchAttendance <chr>, PrayerFrequency <chr>, NumChildren <dbl>,
## #   areatype <chr>, GunOwnership <chr>, ft_fem_2017 <dbl>,
## #   ft_immig_2017 <dbl>, ft_police_2017 <dbl>, ft_dem_2017 <dbl>,
## #   ft_rep_2017 <dbl>, ft_evang_2017 <dbl>, ft_muslim_2017 <dbl>,
## #   ft_jew_2017 <dbl>, ft_christ_2017 <dbl>, ft_gays_2017 <dbl>,
## #   ft_unions_2017 <dbl>, ft_altright_2017 <dbl>, ft_black_2017 <dbl>,
## #   ft_white_2017 <dbl>, ft_hisp_2017 <dbl>
str(voterdata)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 8000 obs. of  51 variables:
##  $ gender                 : chr  "Female" "Female" "Male" "Male" ...
##  $ race                   : chr  "White" "White" "White" "White" ...
##  $ education              : chr  "4-year" "Some College" "High School Graduate" "Some College" ...
##  $ familyincome           : chr  "Prefer not to say" "$60K-$69,999" "$50K-$59,999" "$70K-$79,999" ...
##  $ children               : chr  "No" "No" "No" "No" ...
##  $ region                 : chr  "West" "West" "Midwest" "South" ...
##  $ urbancity              : chr  "Suburb" "Rural Area" "City" "City" ...
##  $ Vote2012               : chr  "Barack Obama" "Mitt Romney" "Mitt Romney" "Barack Obama" ...
##  $ Vote2016               : chr  "Hillary Cinton" "Donald Trump" "Hillary Cinton" "Gary Johnson" ...
##  $ TrumpSanders           : chr  "Bernie Sanders" "Donald Trump" "Bernie Sanders" "Bernie Sanders" ...
##  $ PartyRegistration      : chr  NA "Republican" NA "Decline/No Party/Independent" ...
##  $ PartyIdentification    : chr  "Democrat" "Republican" "Republican" "Independent" ...
##  $ PartyIdentification2   : chr  "Not very strong Democrat" "Strong Republican" "Strong Republican" "Independent" ...
##  $ PartyIdentification3   : chr  "Moderate" "Conservative" "Moderate" "Moderate" ...
##  $ NewsPublicAffairs      : chr  "Most of the time" "Most of the time" "Most of the time" "Most of the time" ...
##  $ DemPrimary             : chr  "Hillary Clinton" NA "Hillary Clinton" "Someone Else" ...
##  $ RepPrimary             : chr  NA "Donald Trump" NA NA ...
##  $ ImmigrantContributions : chr  "Mostly Contribute" "Mostly a Drain" "Mostly Contribute" "Mostly Contribute" ...
##  $ ImmigrantNaturalization: chr  "Favor" "Not Sure" "Favor" "Favor" ...
##  $ ImmigrationShouldBe    : chr  "Slightly Easier" "No change" "Much Easier" "Much Easier" ...
##  $ Abortion               : chr  "Legal in all cases" "Legal in some cases and Illegal in others" "Legal in all cases" "Legal in some cases and Illegal in others" ...
##  $ GayMarriage            : chr  "Favor" "Oppose" "Favor" "Favor" ...
##  $ DeathPenalty           : chr  "Oppose" "Favor" "Favor" "Favor" ...
##  $ DeathPenaltyFreq       : chr  "Too Often" "Not Often Enough" "Not Often Enough" "About Right" ...
##  $ TaxWealthy             : chr  "Favor" "Oppose" "Favor" "Favor" ...
##  $ Healthcare             : chr  "Yes" "No" "Yes" "Yes" ...
##  $ GlobWarmExist          : chr  "Definitely is happening" "Definitely not happening" "Definitely is happening" "Definitely is happening" ...
##  $ GlobWarmingSerious     : chr  "Very Serious" NA "Very Serious" "Somewhat Serious" ...
##  $ AffirmativeAction      : chr  "Favor" "Oppose" "Favor" "Favor" ...
##  $ Religion               : chr  "Roman Catholic" "Mormon" "Agnostic" "Nothing in Particular" ...
##  $ ReligiousImportance    : chr  "Somewhat Important" "Very Important" "Not at all Important" "Not at all Important" ...
##  $ ChurchAttendance       : chr  "Seldom" "More than once a week" "Seldom" "Seldom" ...
##  $ PrayerFrequency        : chr  "Once a day" "Several times a day" "Never" "A few times a month" ...
##  $ NumChildren            : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ areatype               : chr  "Suburb" "Rural Area" "City" "City" ...
##  $ GunOwnership           : chr  "No Gun in Household" "Gun in Household" "Gun in Household" "No Gun in Household" ...
##  $ ft_fem_2017            : num  99 65 74 NA 25 100 73 50 100 100 ...
##  $ ft_immig_2017          : num  95 96 77 NA 91 100 100 1 90 80 ...
##  $ ft_police_2017         : num  76 95 78 NA 94 28 24 95 60 16 ...
##  $ ft_dem_2017            : num  88 86 91 NA 22 99 53 1 90 84 ...
##  $ ft_rep_2017            : num  21 96 20 NA 83 NA 4 50 10 5 ...
##  $ ft_evang_2017          : num  50 96 2 NA 70 NA 53 50 25 6 ...
##  $ ft_muslim_2017         : num  50 61 49 NA 80 100 100 1 69 71 ...
##  $ ft_jew_2017            : num  50 100 25 NA 91 100 100 50 71 71 ...
##  $ ft_christ_2017         : num  50 98 50 NA 94 28 100 95 70 51 ...
##  $ ft_gays_2017           : num  50 82 77 NA 71 100 54 1 100 71 ...
##  $ ft_unions_2017         : num  80 62 100 NA 20 100 80 1 90 81 ...
##  $ ft_altright_2017       : num  1 50 0 NA 50 NA 4 50 0 0 ...
##  $ ft_black_2017          : num  51 98 87 NA 90 100 98 10 56 72 ...
##  $ ft_white_2017          : num  50 90 90 NA 85 50 70 50 41 70 ...
##  $ ft_hisp_2017           : num  79 95 91 NA 90 100 99 26 56 71 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   gender = col_character(),
##   ..   race = col_character(),
##   ..   education = col_character(),
##   ..   familyincome = col_character(),
##   ..   children = col_character(),
##   ..   region = col_character(),
##   ..   urbancity = col_character(),
##   ..   Vote2012 = col_character(),
##   ..   Vote2016 = col_character(),
##   ..   TrumpSanders = col_character(),
##   ..   PartyRegistration = col_character(),
##   ..   PartyIdentification = col_character(),
##   ..   PartyIdentification2 = col_character(),
##   ..   PartyIdentification3 = col_character(),
##   ..   NewsPublicAffairs = col_character(),
##   ..   DemPrimary = col_character(),
##   ..   RepPrimary = col_character(),
##   ..   ImmigrantContributions = col_character(),
##   ..   ImmigrantNaturalization = col_character(),
##   ..   ImmigrationShouldBe = col_character(),
##   ..   Abortion = col_character(),
##   ..   GayMarriage = col_character(),
##   ..   DeathPenalty = col_character(),
##   ..   DeathPenaltyFreq = col_character(),
##   ..   TaxWealthy = col_character(),
##   ..   Healthcare = col_character(),
##   ..   GlobWarmExist = col_character(),
##   ..   GlobWarmingSerious = col_character(),
##   ..   AffirmativeAction = col_character(),
##   ..   Religion = col_character(),
##   ..   ReligiousImportance = col_character(),
##   ..   ChurchAttendance = col_character(),
##   ..   PrayerFrequency = col_character(),
##   ..   NumChildren = col_double(),
##   ..   areatype = col_character(),
##   ..   GunOwnership = col_character(),
##   ..   ft_fem_2017 = col_double(),
##   ..   ft_immig_2017 = col_double(),
##   ..   ft_police_2017 = col_double(),
##   ..   ft_dem_2017 = col_double(),
##   ..   ft_rep_2017 = col_double(),
##   ..   ft_evang_2017 = col_double(),
##   ..   ft_muslim_2017 = col_double(),
##   ..   ft_jew_2017 = col_double(),
##   ..   ft_christ_2017 = col_double(),
##   ..   ft_gays_2017 = col_double(),
##   ..   ft_unions_2017 = col_double(),
##   ..   ft_altright_2017 = col_double(),
##   ..   ft_black_2017 = col_double(),
##   ..   ft_white_2017 = col_double(),
##   ..   ft_hisp_2017 = col_double()
##   .. )
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
voterdata%>%
  summarize(Avg_FT_feminists = mean(ft_fem_2017,na.rm=TRUE))
## # A tibble: 1 x 1
##   Avg_FT_feminists
##              <dbl>
## 1             52.1
library(dplyr)
voterdata%>%
  group_by(education)%>%
  summarize(Avg_FT_feminists = mean(ft_fem_2017,na.rm=TRUE))
## # A tibble: 7 x 2
##   education            Avg_FT_feminists
##   <chr>                           <dbl>
## 1 2-year                           50.0
## 2 4-year                           54.8
## 3 High School Graduate             47.6
## 4 No High School                   43.3
## 5 Post Grad                        57.4
## 6 Some College                     52.1
## 7 <NA>                             53.4
library(ggplot2)
voterdata%>%
  ggplot()+
  geom_histogram(aes(x=ft_fem_2017))

library(ggplot2)
voterdata%>%
  ggplot()+
  geom_histogram(aes(x=ft_fem_2017))+
  facet_wrap(~education)

library(dplyr)
voterdata%>%
  summarize(Avg_FT_hispanics = mean(ft_hisp_2017,na.rm=TRUE))
## # A tibble: 1 x 1
##   Avg_FT_hispanics
##              <dbl>
## 1             70.2
library(dplyr)
voterdata%>%
  group_by(gender)%>%
  summarize(Avg_FT_hispanics = mean(ft_hisp_2017,na.rm=TRUE))
## # A tibble: 2 x 2
##   gender Avg_FT_hispanics
##   <chr>             <dbl>
## 1 Female             71.1
## 2 Male               69.1
library(ggplot2)
voterdata%>%
  ggplot()+
  geom_histogram(aes(x=ft_hisp_2017))

library(ggplot2)
voterdata%>%
  ggplot()+
  geom_histogram(aes(x=ft_hisp_2017))+
  facet_wrap(~gender)

library(dplyr)
voterdata%>%
  summarize(Avg_FT_whites = mean(ft_white_2017,na.rm=TRUE))
## # A tibble: 1 x 1
##   Avg_FT_whites
##           <dbl>
## 1          76.1
library(dplyr)
voterdata%>%
  group_by(familyincome)%>%
  summarize(Avg_FT_ = mean(ft_white_2017,na.rm=TRUE))
## # A tibble: 18 x 2
##    familyincome      Avg_FT_
##    <chr>               <dbl>
##  1 $100K-$119,999       75.8
##  2 $10K-$19,999         75.5
##  3 $120K-$149,999       73.6
##  4 $150K or more        73.7
##  5 $150K-$199,999       73.9
##  6 $200K-$249,999       75.5
##  7 $20K-$29,999         77.1
##  8 $250K-$349,999       75.9
##  9 $30K-$39,999         77.8
## 10 $350K-$499,999       77.4
## 11 $40K-$49,999         77.2
## 12 $500K or more        77.9
## 13 $50K-$59,999         75.8
## 14 $60K-$69,999         76.6
## 15 $70K-$79,999         75.5
## 16 $80K-$99,999         76.1
## 17 Less than $10K       74.2
## 18 Prefer not to say    76.7
library(ggplot2)
voterdata%>%
  ggplot()+
  geom_histogram(aes(x=ft_white_2017))

library(ggplot2)
voterdata%>%
  ggplot()+
  geom_histogram(aes(x=ft_white_2017))+
  facet_wrap(~familyincome)