library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning in file(con, "r"): cannot open file '/var/db/timezone/zoneinfo/
## +VERSION': No such file or directory
library(knitr)
Voter2019 <- read_csv("/Users/chelsyrodriguez/Downloads/Voter Data 2019.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## weight_18_24_2018 = col_logical(),
## izip_2019 = col_character(),
## housevote_other_2019 = col_character(),
## senatevote_other_2019 = col_character(),
## senatevote2_other_2019 = col_character(),
## SenCand1Name_2019 = col_character(),
## SenCand1Party_2019 = col_character(),
## SenCand2Name_2019 = col_character(),
## SenCand2Party_2019 = col_character(),
## SenCand3Name_2019 = col_character(),
## SenCand3Party_2019 = col_character(),
## SenCand1Name2_2019 = col_character(),
## SenCand1Party2_2019 = col_character(),
## SenCand2Name2_2019 = col_character(),
## SenCand2Party2_2019 = col_character(),
## SenCand3Name2_2019 = col_character(),
## SenCand3Party2_2019 = col_character(),
## governorvote_other_2019 = col_character(),
## GovCand1Name_2019 = col_character(),
## GovCand1Party_2019 = col_character()
## # ... with 108 more columns
## )
## ℹ Use `spec()` for the full column specifications.
## Warning: 800 parsing failures.
## row col expected actual file
## 2033 weight_18_24_2018 1/0/T/F/TRUE/FALSE .917710168467982 '/Users/chelsyrodriguez/Downloads/Voter Data 2019.csv'
## 2828 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.41022291345592 '/Users/chelsyrodriguez/Downloads/Voter Data 2019.csv'
## 4511 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.77501243840922 '/Users/chelsyrodriguez/Downloads/Voter Data 2019.csv'
## 7264 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.29486870319614 '/Users/chelsyrodriguez/Downloads/Voter Data 2019.csv'
## 7277 weight_18_24_2018 1/0/T/F/TRUE/FALSE 1.44972719707603 '/Users/chelsyrodriguez/Downloads/Voter Data 2019.csv'
## .... ................. .................. ................ ......................................................
## See problems(...) for more details.
VoterData2019<-Voter2019 %>%
mutate(Employmentstatus = ifelse(employ_2019==1,"Full-time",
ifelse(employ_2019==2,"Part-time","NA")),
SatisfiedlifeJob = ifelse(satisf_Job_2018==1,"VS",
ifelse(satisf_Job_2018==2,"SS",
ifelse(satisf_Job_2018==3,"NS nor DS",
ifelse(satisf_Job_2018==4,"SD","NA")))),
SatisfiedlifeIncome = ifelse(satisf_Income_2018==1,"VS",
ifelse(satisf_Income_2018==2,"SS",
ifelse(satisf_Income_2018==3,"NS nor DS",
ifelse(satisf_Income_2018==4,"SD",
ifelse(satisf_Income_2018==5,"VD","NA"))))),
DifferenceIncome = ifelse(diff_inc_2019==1,"Strongly agree",
ifelse(diff_inc_2019==2,"Somewhat agree",
ifelse(diff_inc_2019==3,"Neither agree or disagree",
ifelse(diff_inc_2019==4,"Somewhat disagree",
ifelse(diff_inc_2019==5,"Strongly disagree",
ifelse(diff_inc_2019==8,"skipped","NA")))))),
FeelingAboutWelfareRecipients = ifelse(wr_2019>100,NA,wr_2019))%>%
select(Employmentstatus,SatisfiedlifeJob,SatisfiedlifeIncome,DifferenceIncome,FeelingAboutWelfareRecipients)%>%
filter(Employmentstatus %in% c("Full-time","Part-time"))
head(VoterData2019)
## # A tibble: 6 x 5
## Employmentstatus SatisfiedlifeJob SatisfiedlifeIn… DifferenceIncome
## <chr> <chr> <chr> <chr>
## 1 Full-time <NA> <NA> Strongly agree
## 2 Full-time VS NS nor DS Strongly disagr…
## 3 Full-time <NA> <NA> Somewhat agree
## 4 Part-time NS nor DS VD Strongly agree
## 5 Full-time VS VS Somewhat agree
## 6 Part-time NS nor DS SD Strongly agree
## # … with 1 more variable: FeelingAboutWelfareRecipients <dbl>
table(VoterData2019$SatisfiedlifeJob,VoterData2019$Employmentstatus) %>%
prop.table(2)
##
## Full-time Part-time
## NA 0.07261641 0.08897485
## NS nor DS 0.15410200 0.23597679
## SD 0.10421286 0.08317215
## SS 0.39356984 0.36363636
## VS 0.27549889 0.22823985
round(2)
## [1] 2
VoterData2019 %>%
group_by(SatisfiedlifeJob,Employmentstatus) %>%
summarize(n=n()) %>%
mutate(percent=n/sum(n)) %>%
ggplot()+
geom_col(aes(x=SatisfiedlifeJob,y=percent,fill=Employmentstatus))
## `summarise()` has grouped output by 'SatisfiedlifeJob'. You can override using the `.groups` argument.
The stacked chart for variable 1 shows us that full-time employees have a higher percentage than part-time employees on how satisfied or dissatisfied they are with their job. There are more full-time employees that are somewhat dissatisfied (SD) with their job than part-time employees. As for part-time employees, it shows they’re neither satisfied (NS) or dissatisfied (DS) with their job.
chisq.test(VoterData2019$SatisfiedlifeJob,VoterData2019$Employmentstatus)
##
## Pearson's Chi-squared test
##
## data: VoterData2019$SatisfiedlifeJob and VoterData2019$Employmentstatus
## X-squared = 23.145, df = 4, p-value = 0.0001184
The p-value indicates the scientific notation as 0.0001184 which is a smaller value then 0.05. Therefore there is a statistically significant relationship on how full-time and part-time employees differ their attitudes in how satisfied they are with their job.
table(VoterData2019$SatisfiedlifeIncome,VoterData2019$Employmentstatus) %>%
prop.table(2)
##
## Full-time Part-time
## NA 0.01385809 0.01547389
## NS nor DS 0.16186253 0.19148936
## SD 0.17960089 0.23210832
## SS 0.41851441 0.31528046
## VD 0.08647450 0.13926499
## VS 0.13968958 0.10638298
round(2)
## [1] 2
VoterData2019 %>%
group_by(SatisfiedlifeIncome,Employmentstatus) %>%
summarize(n=n()) %>%
mutate(percent=n/sum(n)) %>%
ggplot()+
geom_col(aes(x=SatisfiedlifeIncome,y=percent,fill=Employmentstatus))
## `summarise()` has grouped output by 'SatisfiedlifeIncome'. You can override using the `.groups` argument.
The stacked chart for variable 2 shows the same higher percentage in full-time employees then part-time employees on how satisfied or dissatisfied they are with their income. Full-time employees are very satisfied (VS) with their income. As for part-time employees, they are very dissatisfied (VD) with their income.
chisq.test(VoterData2019$SatisfiedlifeIncome,VoterData2019$Employmentstatus)
##
## Pearson's Chi-squared test
##
## data: VoterData2019$SatisfiedlifeIncome and VoterData2019$Employmentstatus
## X-squared = 33.557, df = 5, p-value = 2.917e-06
The p-value shows the scientific notation as 2.917e-06 which is a larger value than .05.It shows a strong evidence for the null hypothesis. There isn’t a statistically significant relationship on how full-time and part-time employees differ their attitudes on how satisfied they are with their income.
VoterData2019%>%
group_by(Employmentstatus)%>%
summarize(Avg_FeelingAboutWelfareRecipients = mean(FeelingAboutWelfareRecipients, na.rm=TRUE))
## # A tibble: 2 x 2
## Employmentstatus Avg_FeelingAboutWelfareRecipients
## * <chr> <dbl>
## 1 Full-time 48.8
## 2 Part-time 51.1
VoterData2019 %>%
group_by(FeelingAboutWelfareRecipients,Employmentstatus) %>%
summarize(n=n()) %>%
mutate(percent=n/sum(n)) %>%
ggplot()+
geom_col(aes(x=Employmentstatus,y=percent,fill=Employmentstatus))
## `summarise()` has grouped output by 'FeelingAboutWelfareRecipients'. You can override using the `.groups` argument.
Full-time employees has a higher percentage on feelings towards welfare recipients compared to part-time employees.
VoterData2019 %>%
filter(Employmentstatus %in% c("Full-time","Part-time")) %>%
ggplot()+
geom_histogram(aes(x=FeelingAboutWelfareRecipients))+
facet_wrap(~Employmentstatus)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2797 rows containing non-finite values (stat_bin).
Fulltime_data <- VoterData2019 %>%
filter(Employmentstatus=="Full-time")
sample(Fulltime_data$FeelingAboutWelfareRecipients,40) %>%
mean(na.rm=TRUE)
## [1] 43.33333
Fulltime_data <- VoterData2019 %>%
filter(Employmentstatus=="Full-time")
sample(Fulltime_data$FeelingAboutWelfareRecipients,40) %>%
mean()
## [1] NA
replicate(10000,sample(Fulltime_data$FeelingAboutWelfareRecipients,40)%>%mean(na.rm=TRUE)) %>%
data.frame() %>%
rename("mean"=1)%>%
ggplot()+
geom_histogram(aes(x=mean),fill="red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
Parttime_data <- VoterData2019 %>%
filter(Employmentstatus=="Part-time")
sample(Parttime_data$FeelingAboutWelfareRecipients,40) %>%
mean(na.rm=TRUE)
## [1] 60.8
Parttime_data <- VoterData2019 %>%
filter(Employmentstatus=="Part-time")
sample(Parttime_data$FeelingAboutWelfareRecipients,40) %>%
mean()
## [1] NA
replicate(10000,sample(Parttime_data$FeelingAboutWelfareRecipients,40)%>%mean(na.rm=TRUE)) %>%
data.frame() %>%
rename("mean"=1)%>%
ggplot()+
geom_histogram(aes(x=mean),fill="blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
Newdata <- VoterData2019 %>%
filter(Employmentstatus %in% c("Full-time","Part-time"))
t.test(FeelingAboutWelfareRecipients~Employmentstatus, data = Newdata)
##
## Welch Two Sample t-test
##
## data: FeelingAboutWelfareRecipients by Employmentstatus
## t = -0.80625, df = 152.08, p-value = 0.4214
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.986139 3.357118
## sample estimates:
## mean in group Full-time mean in group Part-time
## 48.75153 51.06604
The p-value shows that there isn’t a statistically significant difference between full-time and part-time respondents that differ their feelings towards welfare recipients.
According to my analysis, most full-time employers are somewhat dissatisfied with their job however they are very satisfied with their income. Most part-time employers didn’t indicate neither they’re satisfied nor dissatisfied but they are very dissatisfied with their income. Afterall, it is accurate that part time employees would have that response since they are working under 40 hours a week. As for how they feel about welfare recipients, both full-time and part-time employers almost feel the same.