require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
midterm_df <-tbl_df(read.csv(file="http://www.personal.psu.edu/dlp/WFED540/pwces.csv"))
glimpse(midterm_df)
## Observations: 756
## Variables: 2
## $ gender (int) 1, 1, 0, 0, 0, 0, 1, NA, 0, 0, 1, 0, 1, 0, 1, 1, NA, 0...
## $ lifesat (int) 20, 18, 25, 7, 23, 25, 22, NA, 21, 29, 26, 26, 18, 28,...
summary(midterm_df)
## gender lifesat
## Min. :0.0000 Min. : 5.0
## 1st Qu.:0.0000 1st Qu.:18.0
## Median :0.0000 Median :23.0
## Mean :0.4111 Mean :21.5
## 3rd Qu.:1.0000 3rd Qu.:25.0
## Max. :1.0000 Max. :30.0
## NA's :70 NA's :78
# male==0, female==1
#strongly disagree ==1
#moderately disagree==2
#slightly disagree ==3
#slightly agree ==4
#moderately agree ==5
#strongly agree ==6
#The five survey items –
#o “In most ways my life is close to ideal”
#o “The conditions of my life are excellent”
#o “I am satisfied with my life”
#o “So far I have gotten the important things I want in life”
#o “If I could live my life over, I would change almost nothing”
midterm_df%>% group_by(gender)%>% summarize(num=n(), na.rm=TRUE)
## Source: local data frame [3 x 3]
##
## gender num na.rm
## (int) (int) (lgl)
## 1 0 404 TRUE
## 2 1 282 TRUE
## 3 NA 70 TRUE
midterm_df%>% group_by(gender)%>% summarize(rate_satisfied=mean(lifesat, na.rm=TRUE))
## Source: local data frame [3 x 2]
##
## gender rate_satisfied
## (int) (dbl)
## 1 0 21.23869
## 2 1 21.83755
## 3 NA 24.66667
t.test(midterm_df$lifesat ~midterm_df$gender)
##
## Welch Two Sample t-test
##
## data: midterm_df$lifesat by midterm_df$gender
## t = -1.3392, df = 578.22, p-value = 0.181
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.4771301 0.2794267
## sample estimates:
## mean in group 0 mean in group 1
## 21.23869 21.83755
#1. The p-value associated with a t-value of -1.3392 is 0.181.
#2. This p-value of 0.181 is greater than alpha, 0.05.
#3. Therefore, we fail to reject the null hypothesis.
# The null hypotheses that gender is not difference in mean life satisfaction was statistically significant.
midterm_df$highsat<-ifelse(midterm_df$lifesat>=25, 0, 1)
highsat<-table(midterm_df$highsat)
highsat
##
## 0 1
## 233 445
malehighsat<- midterm_df %>% filter(gender==0, highsat==0)%>%summarize(num_male_highsat=n(), na.rm=0)
malehighsat
## Source: local data frame [1 x 2]
##
## num_male_highsat na.rm
## (int) (dbl)
## 1 129 0
prop_malehighsat <- malehighsat/midterm_df%>% filter(gender==0) %>% summarize(num=n(), na.rm=TRUE)
prop_malehighsat
## num_male_highsat na.rm
## 1 0.3193069 0
femalehighsat<- midterm_df %>% filter(gender==1, highsat==0)%>%summarize(prop_female_highsat=n(), na.rm=0)
femalehighsat
## Source: local data frame [1 x 2]
##
## prop_female_highsat na.rm
## (int) (dbl)
## 1 102 0
prop_femalehighsat <- femalehighsat/midterm_df%>% filter(gender==1) %>% summarize(num=n(), na.rm=TRUE)
prop_femalehighsat
## prop_female_highsat na.rm
## 1 0.3617021 0
t.test(prop_malehighsat,prop_femalehighsat)
##
## Welch Two Sample t-test
##
## data: prop_malehighsat and prop_femalehighsat
## t = -0.08787, df = 1.9697, p-value = 0.9381
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.074618 1.032223
## sample estimates:
## mean of x mean of y
## 0.1596535 0.1808511
#1. The p-value associated with a t-value of -0.08787 is 0.9381.
#2. This p-value of 0.9381 is greater than alpha, 0.05.
#3. Therefore, we fail to reject the null hypothesis.
# The null hypotheses that gender is not difference in mean life satisfaction among high satisfied people portion was statistically significant.