Create New Dataset for Analysis

#install.packages("readr")
#install.packages("dplyr")

library(readr) 
library(dplyr) 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
VoterData <- read_csv("~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   redovote2016_t_2017 = col_character(),
##   job_title_t_2017 = col_character(),
##   izip_2016 = col_character(),
##   presvote16post_t_2016 = col_character(),
##   second_chance_t_2016 = col_character(),
##   race_other_2016 = col_character(),
##   healthcov_t_2016 = col_character(),
##   employ_t_2016 = col_character(),
##   pid3_t_2016 = col_character(),
##   religpew_t_2016 = col_character(),
##   votemeth16_rnd_2016 = col_character(),
##   presvote16post_rnd_2016 = col_character(),
##   vote2016_cand2_rnd_2016 = col_character(),
##   Clinton_Rubio_rnd_2016 = col_character(),
##   Clinton_Cruz_rnd_2016 = col_character(),
##   Sanders_Trump_rnd_2016 = col_character(),
##   Sanders_Rubio_rnd_2016 = col_character(),
##   second_chance_rnd_2016 = col_character(),
##   obamaapp_rnd_2016 = col_character(),
##   fav_grid_row_rnd_2016 = col_character()
##   # ... with 121 more columns
## )
## See spec(...) for full column specifications.
## Warning: 13 parsing failures.
##  row                      col           expected actual                                                                                       file
## 1418 religpew_muslim_baseline 1/0/T/F/TRUE/FALSE     90 '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## 1531 child_age7_1_baseline    1/0/T/F/TRUE/FALSE     6  '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## 1531 child_age8_1_baseline    1/0/T/F/TRUE/FALSE     4  '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## 1531 child_age9_1_baseline    1/0/T/F/TRUE/FALSE     2  '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## 2947 religpew_muslim_baseline 1/0/T/F/TRUE/FALSE     2  '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## .... ........................ .................. ...... ..........................................................................................
## See problems(...) for more details.
NewVoterData <- VoterData%>%
  select(presvote16post_2016,                                   
         post_ideo5_2012,
         abortview3_2016,
         region_baseline,
         religpew_baseline,
         ft_police_2017,
         ft_evang_2017,
         ft_gays_2017)%>%
  rename("Vote2016" = presvote16post_2016, 
         "Ideology" = post_ideo5_2012,
         "Abortion" = abortview3_2016,
         "Region" = region_baseline,
         "Religion" = religpew_baseline,
         "Feelings toward police" = ft_police_2017,
         "Feelings toward Evangelical Christians" = ft_evang_2017,
         "Feelings toward gays/lesbians"= ft_gays_2017)%>%
  mutate(Vote2016=ifelse(Vote2016==1,"Hillary Clinton",
                         ifelse(Vote2016==2,"Donald Trump",
                         ifelse(Vote2016==3,"Gary Johnson",
                         ifelse(Vote2016==4,"Jill Stein",                  
                         ifelse(Vote2016==5, "Evan McMullin",     
                         ifelse(Vote2016==6,"Other",       
                         ifelse(Vote2016==7,"Did not vote for President",NA))))))),
           Ideology=ifelse(Ideology==1,"Very liberal",
                         ifelse(Ideology==2,"Liberal",
                         ifelse(Ideology==3,"Moderate",
                         ifelse(Ideology==4,"Conservative", 
                         ifelse(Ideology==5,"Very conservative",  
                         ifelse(Ideology==6,"Not sure",NA)))))),
         Abortion=ifelse(Abortion==1, "Legal in all cases",
                         ifelse(Abortion==2,"Legal in some cases and illegal in others",
                         ifelse(Abortion==3,"Illegal in all cases",
                         ifelse(Abortion==8,"Not sure",NA)))),
         Region = ifelse(Region==1,"Northeast",
                         ifelse(Region==2,"Midwest",
                         ifelse(Region==3,"South",
                         ifelse(Region==4,"West", 
                         ifelse(Region==9,"Not in U.S.",NA))))),
         Religion = ifelse(Religion==1,"Protestant",
                         ifelse(Religion==2,"Roman Catholic",
                         ifelse(Religion==3,"Mormon",
                         ifelse(Religion==4,"Eastern or Greek Orthodox",
                         ifelse(Religion==5,"Jewish",
                         ifelse(Religion==6,"Muslim",
                         ifelse(Religion==7,"Buddhist",
                         ifelse(Religion==8,"Hindu",
                         ifelse(Religion==9,"Atheist",
                         ifelse(Religion==10,"Agnostic",
                         ifelse(Religion==11,"Nothing in Particular",
                         ifelse(Religion==12,"Something else",NA)))))))))))), 
         `Feelings toward police` = ifelse(`Feelings toward police`>100,NA,`Feelings toward police`), 
         `Feelings toward Evangelical Christians`=ifelse(`Feelings toward Evangelical Christians`>100, NA,`Feelings toward Evangelical Christians`),
         `Feelings toward gays/lesbians`=ifelse(`Feelings toward gays/lesbians`>100, NA,`Feelings toward gays/lesbians`))

Investigate Groupwise Differences in the Average and SD of “ft”variable (1)

NewVoterData%>%
   filter(!is.na(Ideology)) %>%
   group_by(Ideology) %>%
   summarize(`Feelings toward police mean`=mean(`Feelings toward police`, na.rm=TRUE), `Feelings toward police sd`=sd(`Feelings toward police`, na.rm=TRUE))
## # A tibble: 6 x 3
##   Ideology          `Feelings toward police mea~ `Feelings toward police s~
##   <chr>                                    <dbl>                      <dbl>
## 1 Conservative                              86.1                       18.3
## 2 Liberal                                   65.5                       24.9
## 3 Moderate                                  73.7                       24.6
## 4 Not sure                                  72.3                       27.8
## 5 Very conservative                         87.0                       17.6
## 6 Very liberal                              59.1                       27.2

Make a Bar Chart for the table (1)

#install.packages("ggplot2")
library(ggplot2)
NewVoterData%>%
     filter(!is.na(Ideology)) %>%
   group_by(Ideology) %>%
   summarize(`Feelings toward police mean`=mean(`Feelings toward police`, na.rm=TRUE), `Feelings toward police sd`=sd(`Feelings toward police`, na.rm=TRUE))%>%
  ggplot()+geom_col(aes(x=Ideology, y=`Feelings toward police mean`, fill=Ideology))+geom_segment(aes(x=Ideology, xend=Ideology, y=`Feelings toward police mean`+`Feelings toward police sd`, yend=`Feelings toward police mean`-`Feelings toward police sd`))

Make a Histogram to visualize the differences (1)

NewVoterData%>%
  filter(!is.na(Ideology)) %>%
  group_by(Ideology) %>%
  ggplot()+geom_histogram(aes(x=`Feelings toward police`, fill=Ideology))+facet_wrap(~Ideology)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3101 rows containing non-finite values (stat_bin).

Investigate Groupwise Differences in the Average and SD of “ft”variable (2)

NewVoterData%>%
   filter(!is.na(Vote2016)) %>%
   group_by(Vote2016) %>%
   summarize(`Feelings toward Evangelical Christians mean`=mean(`Feelings toward Evangelical Christians`, na.rm=TRUE), `Feelings toward Evangelical Christians sd`=sd(`Feelings toward Evangelical Christians`, na.rm=TRUE))
## # A tibble: 7 x 3
##   Vote2016         `Feelings toward Evangelica~ `Feelings toward Evangelic~
##   <chr>                                   <dbl>                       <dbl>
## 1 Did not vote fo~                         54.9                        34.4
## 2 Donald Trump                             73.6                        25.3
## 3 Evan McMullin                            58.5                        32.2
## 4 Gary Johnson                             49.5                        28.3
## 5 Hillary Clinton                          34.8                        31.0
## 6 Jill Stein                               25.2                        26.5
## 7 Other                                    57.3                        34.7

Make a Bar Chart for the table (2)

NewVoterData%>%
   filter(!is.na(Vote2016)) %>%
   group_by(Vote2016) %>%
   summarize(`Feelings toward Evangelical Christians mean`=mean(`Feelings toward Evangelical Christians`, na.rm=TRUE), `Feelings toward Evangelical Christians sd`=sd(`Feelings toward Evangelical Christians`, na.rm=TRUE))%>%
  ggplot()+geom_col(aes(x=Vote2016, y=`Feelings toward Evangelical Christians mean`, fill=Vote2016))+geom_segment(aes(x=Vote2016, xend=Vote2016, y=`Feelings toward Evangelical Christians mean`+`Feelings toward Evangelical Christians sd`, yend=`Feelings toward Evangelical Christians mean`-`Feelings toward Evangelical Christians sd`))

Make a Histogram to visualize the differences (2)

NewVoterData%>%
  filter(!is.na(Vote2016)) %>%
  group_by(Vote2016) %>%
  ggplot()+geom_histogram(aes(x=`Feelings toward Evangelical Christians`, fill=Vote2016))+facet_wrap(~Vote2016)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3165 rows containing non-finite values (stat_bin).

Investigate Groupwise Differences in the Average and SD of “ft”variable (3)

NewVoterData%>%
   filter(!is.na(Abortion)) %>%
   group_by(Abortion) %>%
   summarize(`Feelings toward gays/lesbians mean`=mean(`Feelings toward gays/lesbians`, na.rm=TRUE), `Feelings toward gays/lesbians sd`=sd(`Feelings toward gays/lesbians`, na.rm=TRUE))
## # A tibble: 4 x 3
##   Abortion                `Feelings toward gays/le~ `Feelings toward gays/~
##   <chr>                                       <dbl>                   <dbl>
## 1 Illegal in all cases                         37.5                    31.5
## 2 Legal in all cases                           78.0                    23.7
## 3 Legal in some cases an~                      54.5                    29.9
## 4 Not sure                                     58.9                    28.6

Make a Bar Chart for the table (3)

NewVoterData%>%
   filter(!is.na(Abortion)) %>%
   group_by(Abortion) %>%
   summarize(`Feelings toward gays/lesbians mean`=mean(`Feelings toward gays/lesbians`, na.rm=TRUE), `Feelings toward gays/lesbians sd`=sd(`Feelings toward gays/lesbians`, na.rm=TRUE))%>%
  ggplot()+geom_col(aes(x=Abortion, y=`Feelings toward gays/lesbians mean`, fill=Abortion))+geom_segment(aes(x=Abortion, xend=Abortion, y=`Feelings toward gays/lesbians mean`+`Feelings toward gays/lesbians sd`, yend=`Feelings toward gays/lesbians mean`-`Feelings toward gays/lesbians sd`))

Make a Histogram to visualize the differences (3)

NewVoterData%>%
  filter(!is.na(Abortion)) %>%
  group_by(Abortion) %>%
  ggplot()+geom_histogram(aes(x=`Feelings toward gays/lesbians`, fill=Abortion))+facet_wrap(~Abortion)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3177 rows containing non-finite values (stat_bin).