Means, Standard Deviations, Distributions

Create New Dataset for Analysis

#install.packages("readr")
#install.packages("dplyr")

library(readr) 
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

VoterData <- read_csv("~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv")

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   redovote2016_t_2017 = col_character(),
##   job_title_t_2017 = col_character(),
##   izip_2016 = col_character(),
##   presvote16post_t_2016 = col_character(),
##   second_chance_t_2016 = col_character(),
##   race_other_2016 = col_character(),
##   healthcov_t_2016 = col_character(),
##   employ_t_2016 = col_character(),
##   pid3_t_2016 = col_character(),
##   religpew_t_2016 = col_character(),
##   votemeth16_rnd_2016 = col_character(),
##   presvote16post_rnd_2016 = col_character(),
##   vote2016_cand2_rnd_2016 = col_character(),
##   Clinton_Rubio_rnd_2016 = col_character(),
##   Clinton_Cruz_rnd_2016 = col_character(),
##   Sanders_Trump_rnd_2016 = col_character(),
##   Sanders_Rubio_rnd_2016 = col_character(),
##   second_chance_rnd_2016 = col_character(),
##   obamaapp_rnd_2016 = col_character(),
##   fav_grid_row_rnd_2016 = col_character()
##   # ... with 121 more columns
## )

## See spec(...) for full column specifications.

## Warning: 13 parsing failures.
##  row                      col           expected actual                                                                                       file
## 1418 religpew_muslim_baseline 1/0/T/F/TRUE/FALSE     90 '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## 1531 child_age7_1_baseline    1/0/T/F/TRUE/FALSE     6  '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## 1531 child_age8_1_baseline    1/0/T/F/TRUE/FALSE     4  '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## 1531 child_age9_1_baseline    1/0/T/F/TRUE/FALSE     2  '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## 2947 religpew_muslim_baseline 1/0/T/F/TRUE/FALSE     2  '~/Tsukasa/NY/CUNY/Class/Spring 2019/Programming for Social Research/VoterData2017(1).csv'
## .... ........................ .................. ...... ..........................................................................................
## See problems(...) for more details.

NewVoterData <- VoterData%>%
  select(presvote16post_2016,                                   
         post_ideo5_2012,
         abortview3_2016,
         region_baseline,
         religpew_baseline,
         ft_police_2017,
         ft_evang_2017,
         ft_gays_2017)%>%
  rename("Vote2016" = presvote16post_2016, 
         "Ideology" = post_ideo5_2012,
         "Abortion" = abortview3_2016,
         "Region" = region_baseline,
         "Religion" = religpew_baseline,
         "Feelings toward police" = ft_police_2017,
         "Feelings toward Evangelical Christians" = ft_evang_2017,
         "Feelings toward gays/lesbians"= ft_gays_2017)%>%
  mutate(Vote2016=ifelse(Vote2016==1,"Hillary Clinton",
                         ifelse(Vote2016==2,"Donald Trump",
                         ifelse(Vote2016==3,"Gary Johnson",
                         ifelse(Vote2016==4,"Jill Stein",                  
                         ifelse(Vote2016==5, "Evan McMullin",     
                         ifelse(Vote2016==6,"Other",       
                         ifelse(Vote2016==7,"Did not vote for President",NA))))))),
           Ideology=ifelse(Ideology==1,"Very liberal",
                         ifelse(Ideology==2,"Liberal",
                         ifelse(Ideology==3,"Moderate",
                         ifelse(Ideology==4,"Conservative", 
                         ifelse(Ideology==5,"Very conservative",  
                         ifelse(Ideology==6,"Not sure",NA)))))),
         Abortion=ifelse(Abortion==1, "Legal in all cases",
                         ifelse(Abortion==2,"Legal in some cases and illegal in others",
                         ifelse(Abortion==3,"Illegal in all cases",
                         ifelse(Abortion==8,"Not sure",NA)))),
         Region = ifelse(Region==1,"Northeast",
                         ifelse(Region==2,"Midwest",
                         ifelse(Region==3,"South",
                         ifelse(Region==4,"West", 
                         ifelse(Region==9,"Not in U.S.",NA))))),
         Religion = ifelse(Religion==1,"Protestant",
                         ifelse(Religion==2,"Roman Catholic",
                         ifelse(Religion==3,"Mormon",
                         ifelse(Religion==4,"Eastern or Greek Orthodox",
                         ifelse(Religion==5,"Jewish",
                         ifelse(Religion==6,"Muslim",
                         ifelse(Religion==7,"Buddhist",
                         ifelse(Religion==8,"Hindu",
                         ifelse(Religion==9,"Atheist",
                         ifelse(Religion==10,"Agnostic",
                         ifelse(Religion==11,"Nothing in Particular",
                         ifelse(Religion==12,"Something else",NA)))))))))))), 
         `Feelings toward police` = ifelse(`Feelings toward police`>100,NA,`Feelings toward police`), 
         `Feelings toward Evangelical Christians`=ifelse(`Feelings toward Evangelical Christians`>100, NA,`Feelings toward Evangelical Christians`),
         `Feelings toward gays/lesbians`=ifelse(`Feelings toward gays/lesbians`>100, NA,`Feelings toward gays/lesbians`))

Investigate Groupwise Differences in the Average and SD of “ft”variable (1)

NewVoterData%>%
   filter(!is.na(Ideology)) %>%
   group_by(Ideology) %>%
   summarize(`Feelings toward police mean`=mean(`Feelings toward police`, na.rm=TRUE), `Feelings toward police sd`=sd(`Feelings toward police`, na.rm=TRUE))

## # A tibble: 6 x 3
##   Ideology          `Feelings toward police mea~ `Feelings toward police s~
##   <chr>                                    <dbl>                      <dbl>
## 1 Conservative                              86.1                       18.3
## 2 Liberal                                   65.5                       24.9
## 3 Moderate                                  73.7                       24.6
## 4 Not sure                                  72.3                       27.8
## 5 Very conservative                         87.0                       17.6
## 6 Very liberal                              59.1                       27.2

Make a Bar Chart for the table (1)

#install.packages("ggplot2")
library(ggplot2)
NewVoterData%>%
     filter(!is.na(Ideology)) %>%
   group_by(Ideology) %>%
   summarize(`Feelings toward police mean`=mean(`Feelings toward police`, na.rm=TRUE), `Feelings toward police sd`=sd(`Feelings toward police`, na.rm=TRUE))%>%
  ggplot()+geom_col(aes(x=Ideology, y=`Feelings toward police mean`, fill=Ideology))+geom_segment(aes(x=Ideology, xend=Ideology, y=`Feelings toward police mean`+`Feelings toward police sd`, yend=`Feelings toward police mean`-`Feelings toward police sd`))

The average score of feelings towards police is positively correlated with the extent to which a respondant leans toward conservative.
Conservatives have a warmer feeling towards police than liberals on average.
The two liberal groups have larger variance than the conservative groups.
Compared to conservatives, liberals have more scattered opinion over police.

Make a Histogram to visualize the differences (1)

NewVoterData%>%
  filter(!is.na(Ideology)) %>%
  group_by(Ideology) %>%
  ggplot()+geom_histogram(aes(x=`Feelings toward police`, fill=Ideology))+facet_wrap(~Ideology)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 3101 rows containing non-finite values (stat_bin).

The histogram for conservatives is more right-skewed than that for liberals.
Conservatives’ view on police looks more concentrated around high ftscores indicating favorable feelings.

Investigate Groupwise Differences in the Average and SD of “ft”variable (2)

NewVoterData%>%
   filter(!is.na(Vote2016)) %>%
   group_by(Vote2016) %>%
   summarize(`Feelings toward Evangelical Christians mean`=mean(`Feelings toward Evangelical Christians`, na.rm=TRUE), `Feelings toward Evangelical Christians sd`=sd(`Feelings toward Evangelical Christians`, na.rm=TRUE))

## # A tibble: 7 x 3
##   Vote2016         `Feelings toward Evangelica~ `Feelings toward Evangelic~
##   <chr>                                   <dbl>                       <dbl>
## 1 Did not vote fo~                         54.9                        34.4
## 2 Donald Trump                             73.6                        25.3
## 3 Evan McMullin                            58.5                        32.2
## 4 Gary Johnson                             49.5                        28.3
## 5 Hillary Clinton                          34.8                        31.0
## 6 Jill Stein                               25.2                        26.5
## 7 Other                                    57.3                        34.7

Make a Bar Chart for the table (2)

NewVoterData%>%
   filter(!is.na(Vote2016)) %>%
   group_by(Vote2016) %>%
   summarize(`Feelings toward Evangelical Christians mean`=mean(`Feelings toward Evangelical Christians`, na.rm=TRUE), `Feelings toward Evangelical Christians sd`=sd(`Feelings toward Evangelical Christians`, na.rm=TRUE))%>%
  ggplot()+geom_col(aes(x=Vote2016, y=`Feelings toward Evangelical Christians mean`, fill=Vote2016))+geom_segment(aes(x=Vote2016, xend=Vote2016, y=`Feelings toward Evangelical Christians mean`+`Feelings toward Evangelical Christians sd`, yend=`Feelings toward Evangelical Christians mean`-`Feelings toward Evangelical Christians sd`))

The average score of feelings towards Evangelical Christians differs between the groups.
Trump supporters have a warmer feeling towards Evangelical Christians than Hillary supporters on average.
Compared to Trump supporters, Hillary supporters have more scattered feelings towards Evangelical Christians.

Make a Histogram to visualize the differences (2)

NewVoterData%>%
  filter(!is.na(Vote2016)) %>%
  group_by(Vote2016) %>%
  ggplot()+geom_histogram(aes(x=`Feelings toward Evangelical Christians`, fill=Vote2016))+facet_wrap(~Vote2016)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 3165 rows containing non-finite values (stat_bin).

The histogram for Trump supporters is right-skewed and that for Hillary supporters is silightly left-skewed.
The view of Trump supporters on Evanglical Christians looks concentrated around high ftscores indicating favorable feelings, that of Hillary supporters looks scattered, and some of them show warm feelings toward the group.

Investigate Groupwise Differences in the Average and SD of “ft”variable (3)

NewVoterData%>%
   filter(!is.na(Abortion)) %>%
   group_by(Abortion) %>%
   summarize(`Feelings toward gays/lesbians mean`=mean(`Feelings toward gays/lesbians`, na.rm=TRUE), `Feelings toward gays/lesbians sd`=sd(`Feelings toward gays/lesbians`, na.rm=TRUE))

## # A tibble: 4 x 3
##   Abortion                `Feelings toward gays/le~ `Feelings toward gays/~
##   <chr>                                       <dbl>                   <dbl>
## 1 Illegal in all cases                         37.5                    31.5
## 2 Legal in all cases                           78.0                    23.7
## 3 Legal in some cases an~                      54.5                    29.9
## 4 Not sure                                     58.9                    28.6

Make a Bar Chart for the table (3)

NewVoterData%>%
   filter(!is.na(Abortion)) %>%
   group_by(Abortion) %>%
   summarize(`Feelings toward gays/lesbians mean`=mean(`Feelings toward gays/lesbians`, na.rm=TRUE), `Feelings toward gays/lesbians sd`=sd(`Feelings toward gays/lesbians`, na.rm=TRUE))%>%
  ggplot()+geom_col(aes(x=Abortion, y=`Feelings toward gays/lesbians mean`, fill=Abortion))+geom_segment(aes(x=Abortion, xend=Abortion, y=`Feelings toward gays/lesbians mean`+`Feelings toward gays/lesbians sd`, yend=`Feelings toward gays/lesbians mean`-`Feelings toward gays/lesbians sd`))

The average score of feelings towards gays/lesbians is negatively correlated with the extent to which a respondant favors abortion bans.
Pro-choice(“legal in all cases”) people have a warmer feeling towards gays and lesbians than the pro-life(“illegal in all cases”) on average.
Compared to the pro-choice, the pro-life have more scattered view on gays and lesbians.

Make a Histogram to visualize the differences (3)

NewVoterData%>%
  filter(!is.na(Abortion)) %>%
  group_by(Abortion) %>%
  ggplot()+geom_histogram(aes(x=`Feelings toward gays/lesbians`, fill=Abortion))+facet_wrap(~Abortion)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 3177 rows containing non-finite values (stat_bin).

The histogram for the pro-choice is right-skewed, that for those who have moderate view on abortion is silightly right-skewed, and that for the pro-life is slightly left-skewed.
The total number of the pro-life is smaller than the other two groups that have certain opinions on an abortion issue.
The view of the pro-choice looks more concentrated around high ftscores indicating favorable feelings than the other two groups.