library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

1. Introduction of Research Topic

I am interested in how Males and Females (gender_2019) contrast when it comes to their views about the economy. I will be comparing them based on: their views on the changing economy (econtrend_2019), government policies on the economy (econ_2019), and their feelings towards labor unions (ft_unions_2016).

2. Description of Variables & Data Preparation

My two categorical (DV) variables are:

library(ggplot2)
Voter_Data_2019 <- read_csv("Downloads/Voter Data 2019.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   weight_18_24_2018 = col_logical(),
##   izip_2019 = col_character(),
##   housevote_other_2019 = col_character(),
##   senatevote_other_2019 = col_character(),
##   senatevote2_other_2019 = col_character(),
##   SenCand1Name_2019 = col_character(),
##   SenCand1Party_2019 = col_character(),
##   SenCand2Name_2019 = col_character(),
##   SenCand2Party_2019 = col_character(),
##   SenCand3Name_2019 = col_character(),
##   SenCand3Party_2019 = col_character(),
##   SenCand1Name2_2019 = col_character(),
##   SenCand1Party2_2019 = col_character(),
##   SenCand2Name2_2019 = col_character(),
##   SenCand2Party2_2019 = col_character(),
##   SenCand3Name2_2019 = col_character(),
##   SenCand3Party2_2019 = col_character(),
##   governorvote_other_2019 = col_character(),
##   GovCand1Name_2019 = col_character(),
##   GovCand1Party_2019 = col_character()
##   # ... with 108 more columns
## )
## ℹ Use `spec()` for the full column specifications.
## Warning: 800 parsing failures.
##  row               col           expected           actual                            file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 'Downloads/Voter Data 2019.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 'Downloads/Voter Data 2019.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 'Downloads/Voter Data 2019.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 'Downloads/Voter Data 2019.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 'Downloads/Voter Data 2019.csv'
## .... ................. .................. ................ ...............................
## See problems(...) for more details.
Voter_Data_2019 <- Voter_Data_2019%>%select(gender_2019, econtrend_2019, econ_2019, ft_unions_2016)
head(Voter_Data_2019)
## # A tibble: 6 x 4
##   gender_2019 econtrend_2019 econ_2019 ft_unions_2016
##         <dbl>          <dbl>     <dbl>          <dbl>
## 1           2              2         1             78
## 2          NA             NA        NA             48
## 3           1              3         1             99
## 4           1              3         1             99
## 5           1              1         1             30
## 6           2              3         1            100
Voter_Data_2019 <- Voter_Data_2019%>%mutate(EconomyChanging = ifelse(econtrend_2019==1,"Getting Better",
                                        ifelse(econtrend_2019==2,"About the Same",
                                        ifelse(econtrend_2019==3,"Getting Worse",NA))),
  GovtPolicies = ifelse(econ_2019==1, " In general, government policies
 can have a large effect on the
 economy.", 
                       ifelse(econ_2019==2," In general, government policies
 do not matter much for the
 economy.",
                       ifelse(econ_2019==8, "Skipped", NA))),
  Gender= ifelse(gender_2019==1, "Male",
                 ifelse(gender_2019==2,"Female",NA)),
  Unions = ifelse(ft_unions_2016>100,NA,ft_unions_2016))

head(Voter_Data_2019)
## # A tibble: 6 x 8
##   gender_2019 econtrend_2019 econ_2019 ft_unions_2016 EconomyChanging
##         <dbl>          <dbl>     <dbl>          <dbl> <chr>          
## 1           2              2         1             78 About the Same 
## 2          NA             NA        NA             48 <NA>           
## 3           1              3         1             99 Getting Worse  
## 4           1              3         1             99 Getting Worse  
## 5           1              1         1             30 Getting Better 
## 6           2              3         1            100 Getting Worse  
## # … with 3 more variables: GovtPolicies <chr>, Gender <chr>, Unions <dbl>
head(Voter_Data_2019%>%select(Gender, EconomyChanging, GovtPolicies, Unions))
## # A tibble: 6 x 4
##   Gender EconomyChanging GovtPolicies                                     Unions
##   <chr>  <chr>           <chr>                                             <dbl>
## 1 Female About the Same  " In general, government policies\n can have a …     78
## 2 <NA>   <NA>             <NA>                                                48
## 3 Male   Getting Worse   " In general, government policies\n can have a …     99
## 4 Male   Getting Worse   " In general, government policies\n can have a …     99
## 5 Male   Getting Better  " In general, government policies\n can have a …     30
## 6 Female Getting Worse   " In general, government policies\n can have a …    100

3. Analysis: Group Variable (IV) x Categorical Variable #1 (DV)

VD <- Voter_Data_2019%>%select(Gender, EconomyChanging)%>%filter(Gender %in% c("Male", "Female"))
head(VD)
## # A tibble: 6 x 2
##   Gender EconomyChanging
##   <chr>  <chr>          
## 1 Female About the Same 
## 2 Male   Getting Worse  
## 3 Male   Getting Worse  
## 4 Male   Getting Better 
## 5 Female Getting Worse  
## 6 Female About the Same

Crosstab (showing row or column % appropriately)

table(VD$Gender, VD$EconomyChanging)%>%prop.table(1)%>%round(2)
##         
##          About the Same Getting Better Getting Worse
##   Female           0.44           0.31          0.25
##   Male             0.35           0.46          0.19

Visualization: Stacked barchart

VD%>%select(Gender, EconomyChanging)%>%group_by(Gender, EconomyChanging)%>%summarize(n=n())%>% mutate(percent=n/sum(n))%>% filter(EconomyChanging %in% c("Getting Better", "Getting Worse"))%>%ggplot()+geom_col(aes(x= Gender, fill= EconomyChanging, y= percent))+ scale_fill_manual(values=c("lightslateblue", "moccasin"))+ggtitle("Gender x Ecnonomy Changing ")
## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.

Statistical Test: Chi-square test

chisq.test(VD $ Gender,VD $ EconomyChanging)
## 
##  Pearson's Chi-squared test
## 
## data:  VD$Gender and VD$EconomyChanging
## X-squared = 160.59, df = 2, p-value < 2.2e-16

Interpretation of the results of the above outputs overall: More males believe that the economy is getting better. More females believe that the economy is getting worse.

4. Analysis: Group Variable (IV) x Categorical Variable #2 (DV)

VD <- Voter_Data_2019%>%select(Gender,GovtPolicies)%>%filter(Gender %in% c("Male", "Female"))
head(VD)
## # A tibble: 6 x 2
##   Gender GovtPolicies                                                           
##   <chr>  <chr>                                                                  
## 1 Female " In general, government policies\n can have a large effect on the\n e…
## 2 Male   " In general, government policies\n can have a large effect on the\n e…
## 3 Male   " In general, government policies\n can have a large effect on the\n e…
## 4 Male   " In general, government policies\n can have a large effect on the\n e…
## 5 Female " In general, government policies\n can have a large effect on the\n e…
## 6 Female " In general, government policies\n can have a large effect on the\n e…

Crosstab (showing row or column % appropriately)

table(VD$Gender, VD$GovtPolicies)%>%prop.table(1)%>%round(2)
##         
##           In general, government policies\n can have a large effect on the\n economy.
##   Female                                                                         0.91
##   Male                                                                           0.92
##         
##           In general, government policies\n do not matter much for the\n economy.
##   Female                                                                     0.08
##   Male                                                                       0.07
##         
##          Skipped
##   Female    0.01
##   Male      0.00

Visualization: Stacked barchart

VD%>%
    filter(!is.na(GovtPolicies),
         ! is.na(Gender))%>%
  group_by(Gender, GovtPolicies)%>%
  summarize(n=n())%>%
  mutate(percent=n/sum(n))%>%
  ggplot()+
  geom_col(aes(x=Gender,y=percent,fill=GovtPolicies))
## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.

Statistical Test: Chi-square test

chisq.test(VD$Gender, VD$ GovtPolicies)
## 
##  Pearson's Chi-squared test
## 
## data:  VD$Gender and VD$GovtPolicies
## X-squared = 18.616, df = 2, p-value = 9.069e-05

Interpretation of the results of the above outputs overall: Both male and female genders agree with government policies having a large effect on the economy.

5. Analysis: Group Variable (IV) x Continuous Variable #1 (DV)

VD <- Voter_Data_2019%>%select(Gender,Unions)%>%filter(Gender %in% c("Male", "Female"))
head(VD)
## # A tibble: 6 x 2
##   Gender Unions
##   <chr>   <dbl>
## 1 Female     78
## 2 Male       99
## 3 Male       99
## 4 Male       30
## 5 Female    100
## 6 Female     91

Table Comparing Means

VD%>%group_by(Gender)%>%summarise(mean= mean(Unions))
## # A tibble: 2 x 2
##   Gender  mean
## * <chr>  <dbl>
## 1 Female    NA
## 2 Male      NA

Visualization: Bar chart comparing means

Voter_Data_2019%>%
  group_by(Gender)%>%
  summarize(ft_unions_2016avg=mean(ft_unions_2016,na.rm=TRUE))%>%
  ggplot()+
  geom_col(aes(x=Gender,y=ft_unions_2016avg,fill=Gender))

Visualization: Histogram comparing population distributions

VD%>%ggplot()+geom_histogram(aes(x=Unions, fill=Gender), color="black")+facet_wrap(~Gender)+ scale_fill_manual(values=c("palevioletred1", "paleturquoise1"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1162 rows containing non-finite values (stat_bin).

Visualization: Histogram comparing sampling distributions

mdata <- VD%>%filter(Gender== "Male")
fdata <- VD%>%filter(Gender== "Female")

mmdata<- replicate(10000, sample(mdata$Unions,40)%>%mean(na.rm = TRUE))%>%data.frame()%>%rename("mean"=1)
ffdata<- replicate(10000, sample(fdata$Unions,40)%>%mean(na.rm = TRUE))%>%data.frame()%>%rename("mean"=1)

ggplot()+geom_histogram(data=mmdata,aes(x=mean),fill="salmon", color="slategray")+geom_histogram(data=ffdata,aes(x=mean),fill="seagreen1", color="slategray", alpha= 0.5)+ggtitle("Sampling distributions")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

T-Test

Welch Two Sample t-test

data: ft_unions_2016 by gender_2019

t = -3.9084, df = 5894.3, p-value = 9.393e-05 alternative hypothesis: true

difference in means is not equal to 0

95 percent confidence interval:

-33.27489 -11.04520

sample estimates: mean in group 1: 92.82774
mean in group 2: 114.98778

Interpretation of the results of the above outputs overall: There is a significant statistical relationship between Male and Female with their mean feelings towards labor unions because the p-value is more than 0.05. Females feel more strongly about labor unions and males have less feelings towards labor unions.

6. Conclusions

  • For the first categorical analysis, we can see that Gender does affect how people view the Changing of the Economy. Based on the charts, more Males think that the economy is getting better, while more females believe that the economy is getting worse. Furthermore, the Chi- squared test shows us the p-value being lower than 0.05, which means there is a statistically significant relationship between gender and the economy changing. Gender does affect how people view the economy.

  • For the second categorical analysis, we can see that Gender does affect how people view government policies. Based on the charts, both male and female genders agree with government policies having a large effect on the economy.

  • Lastly, for our continuous variable analysis, we can see that Gender does affect people’s feelings towards Labor Unions. Based on the charts, females have greater feelings towards labor unions than males.

  • Furthermore, Gender does affect how people view the changing of economy, how they view the government policies can affect the economy, as well as if they like labor unions.