Real Research Question: How do Hillary Clinton Voters and Marco Rubio Voters in the election of 2008 differ in their perspective on the favor/oppose on death penalty, whether if they believe that death penalty happens too often or not often enough, and how they feel about police officers in 2017?

Data Processing and Management: Importing, Recoding, Filtering, selecting renaming, creating new variable

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gapminder)
library(tidyr)
library(ggplot2)
library(knitr)
library(readr)

Voter_Data_2019 <- read_csv("~/Downloads/Voter Data 2019.csv") %>%
  select(Clinton_Rubio_2016,deathpenalty_baseline,deathpenfreq_baseline,ft_police_2017)%>%
  rename(Political_Voters = Clinton_Rubio_2016, 
         Favor_or_Oppose = deathpenalty_baseline,
         Often_or_Not_Often= deathpenfreq_baseline)%>%
  arrange(Political_Voters,Favor_or_Oppose,Often_or_Not_Often, ft_police_2017)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   weight_18_24_2018 = col_logical(),
##   izip_2019 = col_character(),
##   housevote_other_2019 = col_character(),
##   senatevote_other_2019 = col_character(),
##   senatevote2_other_2019 = col_character(),
##   SenCand1Name_2019 = col_character(),
##   SenCand1Party_2019 = col_character(),
##   SenCand2Name_2019 = col_character(),
##   SenCand2Party_2019 = col_character(),
##   SenCand3Name_2019 = col_character(),
##   SenCand3Party_2019 = col_character(),
##   SenCand1Name2_2019 = col_character(),
##   SenCand1Party2_2019 = col_character(),
##   SenCand2Name2_2019 = col_character(),
##   SenCand2Party2_2019 = col_character(),
##   SenCand3Name2_2019 = col_character(),
##   SenCand3Party2_2019 = col_character(),
##   governorvote_other_2019 = col_character(),
##   GovCand1Name_2019 = col_character(),
##   GovCand1Party_2019 = col_character()
##   # ... with 108 more columns
## )
## See spec(...) for full column specifications.
## Warning: 800 parsing failures.
##  row               col           expected           actual                              file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 '~/Downloads/Voter Data 2019.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 '~/Downloads/Voter Data 2019.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 '~/Downloads/Voter Data 2019.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 '~/Downloads/Voter Data 2019.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 '~/Downloads/Voter Data 2019.csv'
## .... ................. .................. ................ .................................
## See problems(...) for more details.
head(Voter_Data_2019)
## # A tibble: 6 x 4
##   Political_Voters Favor_or_Oppose Often_or_Not_Often ft_police_2017
##              <dbl>           <dbl>              <dbl>          <dbl>
## 1                1               1                  1              0
## 2                1               1                  1              7
## 3                1               1                  1             10
## 4                1               1                  1             14
## 5                1               1                  1             19
## 6                1               1                  1             19
Voter_Data_2019 <- Voter_Data_2019%>%
  mutate(Political_Voters = ifelse(Political_Voters==1,"Hillary Clinton Voters", 
                              ifelse(Political_Voters==2,"Marco Rubio Voters",NA)),
         ft_police_2017 = 
           ifelse(ft_police_2017>100,NA,ft_police_2017))

head(Voter_Data_2019)
## # A tibble: 6 x 4
##   Political_Voters       Favor_or_Oppose Often_or_Not_Often ft_police_2017
##   <chr>                            <dbl>              <dbl>          <dbl>
## 1 Hillary Clinton Voters               1                  1              0
## 2 Hillary Clinton Voters               1                  1              7
## 3 Hillary Clinton Voters               1                  1             10
## 4 Hillary Clinton Voters               1                  1             14
## 5 Hillary Clinton Voters               1                  1             19
## 6 Hillary Clinton Voters               1                  1             19
Voter_Data_2019 <- Voter_Data_2019 %>%
  mutate(Favor_or_Oppose = ifelse(Favor_or_Oppose==1,"Favor the death penalty", 
                        ifelse(Favor_or_Oppose==2,"Oppose the death penalty",
            
                        ifelse(Favor_or_Oppose==8,"Not Sure",NA))))

head(Voter_Data_2019)
## # A tibble: 6 x 4
##   Political_Voters       Favor_or_Oppose        Often_or_Not_Oft… ft_police_2017
##   <chr>                  <chr>                              <dbl>          <dbl>
## 1 Hillary Clinton Voters Favor the death penal…                 1              0
## 2 Hillary Clinton Voters Favor the death penal…                 1              7
## 3 Hillary Clinton Voters Favor the death penal…                 1             10
## 4 Hillary Clinton Voters Favor the death penal…                 1             14
## 5 Hillary Clinton Voters Favor the death penal…                 1             19
## 6 Hillary Clinton Voters Favor the death penal…                 1             19
Voter_Data_2019 <- Voter_Data_2019 %>%
  mutate(Often_or_Not_Often=ifelse(Often_or_Not_Often==1,"Too Often",
                                      ifelse(Often_or_Not_Often==2,"About right",
                                      ifelse(Often_or_Not_Often==3,"Not Often enough",
                                      ifelse(Often_or_Not_Often==4,"Not Sure",NA)))))

head(Voter_Data_2019)
## # A tibble: 6 x 4
##   Political_Voters       Favor_or_Oppose        Often_or_Not_Oft… ft_police_2017
##   <chr>                  <chr>                  <chr>                      <dbl>
## 1 Hillary Clinton Voters Favor the death penal… Too Often                      0
## 2 Hillary Clinton Voters Favor the death penal… Too Often                      7
## 3 Hillary Clinton Voters Favor the death penal… Too Often                     10
## 4 Hillary Clinton Voters Favor the death penal… Too Often                     14
## 5 Hillary Clinton Voters Favor the death penal… Too Often                     19
## 6 Hillary Clinton Voters Favor the death penal… Too Often                     19

Statistical Data Analysis:Crosstabs, Chi-squared tests, Mean/Average Comparisons, and T-tests

Comparing people who voted for Hillary Clinton and people who voted for Marco Rubio on their stance on Favor or Oppose of Death Penalty.

Voter_Data_2019%>%
  filter(Political_Voters %in% 
           c("Hillary Clinton Voters","Marco Rubio Voters"))%>% 
  group_by(Political_Voters,Favor_or_Oppose)%>%
  summarize(n=n())%>%
  arrange(Political_Voters,Favor_or_Oppose)%>%
  mutate(percent=n/sum(n))
## # A tibble: 8 x 4
## # Groups:   Political_Voters [2]
##   Political_Voters       Favor_or_Oppose              n percent
##   <chr>                  <chr>                    <int>   <dbl>
## 1 Hillary Clinton Voters Favor the death penalty   1396 0.391  
## 2 Hillary Clinton Voters Not Sure                   698 0.196  
## 3 Hillary Clinton Voters Oppose the death penalty  1444 0.405  
## 4 Hillary Clinton Voters <NA>                        30 0.00841
## 5 Marco Rubio Voters     Favor the death penalty   2944 0.778  
## 6 Marco Rubio Voters     Not Sure                   364 0.0962 
## 7 Marco Rubio Voters     Oppose the death penalty   447 0.118  
## 8 Marco Rubio Voters     <NA>                        28 0.00740

Comparing people who voted for Hillary Clinton and people who voted for Marco Rubio on their stance on how often the death penalty happens.

Voter_Data_2019%>%
  filter(Political_Voters %in% 
           c("Hillary Clinton Voters","Marco Rubio Voters"))%>% 
  group_by(Political_Voters,Often_or_Not_Often)%>%
  summarize(n=n())%>%
  arrange(Political_Voters,Often_or_Not_Often)%>%
  mutate(percent=n/sum(n))
## # A tibble: 10 x 4
## # Groups:   Political_Voters [2]
##    Political_Voters       Often_or_Not_Often     n percent
##    <chr>                  <chr>              <int>   <dbl>
##  1 Hillary Clinton Voters About right          432 0.121  
##  2 Hillary Clinton Voters Not Often enough     897 0.251  
##  3 Hillary Clinton Voters Not Sure             724 0.203  
##  4 Hillary Clinton Voters Too Often           1481 0.415  
##  5 Hillary Clinton Voters <NA>                  34 0.00953
##  6 Marco Rubio Voters     About right          623 0.165  
##  7 Marco Rubio Voters     Not Often enough    2262 0.598  
##  8 Marco Rubio Voters     Not Sure             528 0.140  
##  9 Marco Rubio Voters     Too Often            344 0.0909 
## 10 Marco Rubio Voters     <NA>                  26 0.00687

AVERAGE FEELINGS about Police Officers for Hillary Clinton Voters and Marco Rubio Voters (0-100)

Voter_Data_2019%>%
  filter(Political_Voters %in% 
           c("Hillary Clinton Voters","Marco Rubio Voters"))%>% 
  group_by(Political_Voters)%>%     
  summarize(Average=mean(ft_police_2017, na.rm = TRUE))
## # A tibble: 2 x 2
##   Political_Voters       Average
##   <chr>                    <dbl>
## 1 Hillary Clinton Voters    66.4
## 2 Marco Rubio Voters        86.0

When asked to rate Police Officers in 2017 on a scale from 0 to 100, Hillary Clinton Voters gave an average rating of 66.4, and Marco Rubio Voters gave an average score of 86.0. This indicates that Marco Rubio Voters feel more favorably towards Police Officers than Hillary Clinton Voters do.

T.test of how the voters feel about Police

t.test(ft_police_2017~Political_Voters, data = Voter_Data_2019)
## 
##  Welch Two Sample t-test
## 
## data:  ft_police_2017 by Political_Voters
## t = -31.747, df = 4583.8, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.79439 -18.37553
## sample estimates:
## mean in group Hillary Clinton Voters     mean in group Marco Rubio Voters 
##                             66.39719                             85.98215

When I was comparing Hillary Clinton Voters and Marco Rubio Voters based on their response and I compared these two groups on their outcomes for the ft_police_2017 variable (Feeling towards police officers). I have discovered that the Marco Rubio Voters feel more favorable to police officer than Hillary Clinton Voters. I can see that the Marco Rubio Voters mean is about 85.98 and the mean for Hillary Clinton Voters is 66.39. The p-value is about 2.2e-16, which shows that the it is more significant.

% of Respondents on Political Voters

table(Voter_Data_2019$Political_Voters)%>%
prop.table()%>%
  round(2)
## 
## Hillary Clinton Voters     Marco Rubio Voters 
##                   0.49                   0.51

% of Respondents on support of Death Penalty

table(Voter_Data_2019$Favor_or_Oppose)%>%
prop.table()%>%
  round(2)
## 
##  Favor the death penalty                 Not Sure Oppose the death penalty 
##                     0.60                     0.15                     0.25

Chi-squared test of favor of Death Penalty

chisq.test(Voter_Data_2019$Political_Voters,
           Voter_Data_2019$Favor_or_Oppose)
## 
##  Pearson's Chi-squared test
## 
## data:  Voter_Data_2019$Political_Voters and Voter_Data_2019$Favor_or_Oppose
## X-squared = 1177.4, df = 2, p-value < 2.2e-16

Null Hypothesis of favor of Death Penalty

chisq.test(Voter_Data_2019$Political_Voters,
           Voter_Data_2019$Favor_or_Oppose)[7]
## $expected
##                         
##                          Favor the death penalty Not Sure
##   Hillary Clinton Voters                2105.433 515.2003
##   Marco Rubio Voters                    2234.567 546.7997
##                         
##                          Oppose the death penalty
##   Hillary Clinton Voters                 917.3671
##   Marco Rubio Voters                     973.6329

Actual Hypothesis of favor of Death Penalty

chisq.test(Voter_Data_2019$Political_Voters,
           Voter_Data_2019$Favor_or_Oppose)[6]
## $observed
##                         
##                          Favor the death penalty Not Sure
##   Hillary Clinton Voters                    1396      698
##   Marco Rubio Voters                        2944      364
##                         
##                          Oppose the death penalty
##   Hillary Clinton Voters                     1444
##   Marco Rubio Voters                          447

When you compare the Null Hypothesis and the actual Hypothesis on if they favor the death penalty or not, you can see that there is major difference in values.

For the Null Hypothesis, for Hillary Clinton Voters, in “favor of the death penalty”, it is about 2105.433, in “Not Sure”, it is about 515.2003, and then for “Oppose of the death penalty”, it is about 917.3671. For Marco Rubio Voters, in “favor of the death penalty”, it is about 2234.567, in “Not Sure”, it is about 546.7997, and then “for Oppose of the death penalty”, it is about 973.6329.

For the Actual Hypothesis, for Hillary Clinton Voters, in “favor of the death penalty”, it is about 1396, in “Not Sure”, it is about 698, and then for “Oppose of the death penalty”, it is about 1444. For Marco Rubio Voters, in “favor of the death penalty”, it is about 2944, in “Not Sure”, it is about 364, and then for “Oppose of the death penalty”, it is about 447.

% of Respondents on how frequently Death Penalty happens

table(Voter_Data_2019$Often_or_Not_Often)%>%
prop.table()%>%
  round(2)
## 
##      About right Not Often enough         Not Sure        Too Often 
##             0.14             0.43             0.18             0.24

Chi-Squared test of how frequently Death Penalty happens

chisq.test(Voter_Data_2019$Political_Voters,
           Voter_Data_2019$Often_or_Not_Often)
## 
##  Pearson's Chi-squared test
## 
## data:  Voter_Data_2019$Political_Voters and Voter_Data_2019$Often_or_Not_Often
## X-squared = 1357.9, df = 3, p-value < 2.2e-16

Null Hypothesis of how frequently Death Penalty happens

chisq.test(Voter_Data_2019$Political_Voters,
           Voter_Data_2019$Often_or_Not_Often)[7]
## $expected
##                         
##                          About right Not Often enough Not Sure Too Often
##   Hillary Clinton Voters    511.3661          1531.19 606.8534  884.5906
##   Marco Rubio Voters        543.6339          1627.81 645.1466  940.4094

Actual Hypothesis of how frequently Death Penalty happens

chisq.test(Voter_Data_2019$Political_Voters,
           Voter_Data_2019$Often_or_Not_Often)[6]
## $observed
##                         
##                          About right Not Often enough Not Sure Too Often
##   Hillary Clinton Voters         432              897      724      1481
##   Marco Rubio Voters             623             2262      528       344

When you compare the Null Hypothesis and the actual Hypothesis on how frequent they think the death penalty happens

For the Null Hypothesis, for Hillary Clinton Voters, in “About right”, it is about 2105.433, in “Not Often enough”, it is about 515.2003, for “Not Sure”, it is about 917.3671, and then for “Too Often”, it is about 884.5906. For Marco Rubio Voters, in “About right”, it is about 2234.567, in “Not Often enough”, it is about 546.7997, “Not Sure”, it is about 973.6329, and then for “Too Often”, it is about 940.4094.

For the Actual Hypothesis, for Hillary Clinton Voters, in “About right”, it is about 432, in “Not Often enough”, it is about 897, for “Not Sure”, it is about 724, and then for “Too Often”, it is about 1481. For Marco Rubio Voters, in “About right”, it is about 623, in “Not Often enough”, it is about 2262, “Not Sure”, it is about 528, and then for “Too Often”, it is about 344.

Data Visualisation: Bar Charts, Stack Bar charts, Histograms, Sampling Distributions,

Interpretive Ability: Interpreting the tables and charts generated via the above methods.

Bar Column of Political Voters if they favor Death Penalty or not.

Voter_Data_2019%>%
  filter(Political_Voters %in% 
           c("Hillary Clinton Voters","Marco Rubio Voters"))%>% 
  group_by(Political_Voters,Favor_or_Oppose)%>%
  summarize(n=n())%>%
  arrange(Political_Voters,Favor_or_Oppose)%>%
  mutate(percent=n/sum(n))%>%
  ggplot()+
  geom_col(aes(x=Political_Voters, y=percent, fill = Favor_or_Oppose))

### Table of Political Voters if they favor Death Penalty or not.

prop.table(table(Voter_Data_2019$Favor_or_Oppose,
                 Voter_Data_2019$Political_Voters),2)%>%
  round(2)
##                           
##                            Hillary Clinton Voters Marco Rubio Voters
##   Favor the death penalty                    0.39               0.78
##   Not Sure                                   0.20               0.10
##   Oppose the death penalty                   0.41               0.12

In this bar chart and the table, there are 4 different colors and different response of the two groups, which are Hillary Clinton Voters and Marco Rubio Voters response if they favor Death Penalty or not or if they are unsure. For Hillary Clinton Voters, you can see that the red bar, 39% have “favored” it and see that they blue bar, 41% have “oppose” it and the green bar, the remaining 20% have answered “Not sure”. For Marco Rubio, you can see that the red bar, the 78% have “favored” it and the blue bar, the 12% have “opposed” it and the green bar, the remaining 10% have answered “Not Sure”, and there is NA values but it is so little that it is not visible in the data.

Bar graph of Political Voters if they believe Death Penalties happen frequently or not.

Voter_Data_2019%>%
  filter(Political_Voters %in% 
           c("Hillary Clinton Voters","Marco Rubio Voters"))%>% 
  group_by(Political_Voters,Often_or_Not_Often)%>%
  summarize(n=n())%>%
  arrange(Political_Voters,Often_or_Not_Often)%>%
  mutate(percent=n/sum(n))%>%
  ggplot()+
  geom_col(aes(x=Political_Voters, y=percent, fill = Often_or_Not_Often))

### Table of Political Voters if they believe Death Penalties happen frequently or not.

prop.table(table(Voter_Data_2019$Often_or_Not_Often,
                 Voter_Data_2019$Political_Voters),2)%>%
  round(2)
##                   
##                    Hillary Clinton Voters Marco Rubio Voters
##   About right                        0.12               0.17
##   Not Often enough                   0.25               0.60
##   Not Sure                           0.20               0.14
##   Too Often                          0.42               0.09

In this bar chart and the table, there are 4 different colors and different response of the two groups, which are Hillary Clinton Voters and Marco Rubio Voters response if they favor Death Penalty or not or if they are unsure. For red bar, which represents About right, for Hillary Clinton Voters, it shows about 12% for that and for Marco Rubio, it is about 17%. For the green bar, which represents “Not Often enough”, for Hillary Clinton Voters, it shows about 25% picked that as their response, and for Marco Rubio Voters, it shows about 60% picked that as their response. For the blue bar, which represents “Not sure”, for Hillary Clinton Voters, it shows about 20% picked that as their response, and for Marco Rubio Voters, it shows about 14% picked that as their response. For the purple bar, which represents “Too Often”,for Hillary Clinton Voters, it shows about 42% picked that as their response, and for Marco Rubio Voters, it shows about 9% have picked that as their response.

Histogram of when comparing the political voters about how they feel about polices.

Voter_Data_2019%>%
  filter(Political_Voters %in% c("Hillary Clinton Voters","Marco Rubio Voters"))%>%
  ggplot()+
  geom_histogram(aes(x=ft_police_2017))+
  facet_wrap(~Political_Voters)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1987 rows containing non-finite values (stat_bin).

#### In this histogram chart, you can see that in the graph for Hillary Clinton Voters, have a smaller higher value than Marco Rubio Voters. At the end of the histogram, it shows that there is a dramatic difference in value on how they feel about Polices in 2017 and that Marco Rubio Voter truly favor the police at the end than Hillary Clinton Voters.

Sampling Distribution of Hillary Clinton Voters

voter <- Voter_Data_2019%>%
  filter(Political_Voters == "Hillary Clinton Voters")
replicate(10000,
          sample(voter$ft_police_2017,40)%>%
            mean(na.rm=TRUE))%>%
  data.frame()%>%
  rename("Mean"=1)%>%
  ggplot()+
  geom_histogram(aes(x=Mean),fill="lightblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Sampling Distribution of Marco Rubio Voters

Voter <- Voter_Data_2019%>%
  filter(Political_Voters=="Marco Rubio Voters")
replicate(10000,
          sample(Voter$ft_police_2017,40)%>%
            mean(na.rm=TRUE))%>%
  data.frame()%>%
  rename("Mean"=1)%>%
  ggplot()+
  geom_histogram(aes(x=Mean),fill="darkred")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

When you compare the sampling distribution of Hillary Clinton Voters and Marco Rubio Voters, you can see that the mean for Hillary Clinton Voters that ranges about (47-92), which is less than Marco Rubio Voters that is ranges from (70-97). When you compare the count between both of the Sampling distribution, it shows that Hillary Clinton Voters’ highest count is almost similar to Marco Rubio Voters’ highest count.