require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
midterm_df <-tbl_df(read.csv(file="http://www.personal.psu.edu/dlp/WFED540/pwces.csv"))
glimpse(midterm_df)
## Observations: 756
## Variables: 2
## $ gender  (int) 1, 1, 0, 0, 0, 0, 1, NA, 0, 0, 1, 0, 1, 0, 1, 1, NA, 0...
## $ lifesat (int) 20, 18, 25, 7, 23, 25, 22, NA, 21, 29, 26, 26, 18, 28,...
summary(midterm_df)
##      gender          lifesat    
##  Min.   :0.0000   Min.   : 5.0  
##  1st Qu.:0.0000   1st Qu.:18.0  
##  Median :0.0000   Median :23.0  
##  Mean   :0.4111   Mean   :21.5  
##  3rd Qu.:1.0000   3rd Qu.:25.0  
##  Max.   :1.0000   Max.   :30.0  
##  NA's   :70       NA's   :78
# male==0, female==1

#strongly disagree ==1 
#moderately disagree==2
#slightly disagree ==3
#slightly agree ==4
#moderately agree ==5
#strongly agree ==6

#The five survey items –
#o “In most ways my life is close to ideal”
#o “The conditions of my life are excellent”
#o “I am satisfied with my life”
#o “So far I have gotten the important things I want in life”
#o “If I could live my life over, I would change almost nothing”

midterm_df%>% group_by(gender)%>% summarize(num=n(), na.rm=TRUE)
## Source: local data frame [3 x 3]
## 
##   gender   num na.rm
##    (int) (int) (lgl)
## 1      0   404  TRUE
## 2      1   282  TRUE
## 3     NA    70  TRUE
midterm_df%>% group_by(gender)%>% summarize(rate_satisfied=mean(lifesat, na.rm=TRUE))
## Source: local data frame [3 x 2]
## 
##   gender rate_satisfied
##    (int)          (dbl)
## 1      0       21.23869
## 2      1       21.83755
## 3     NA       24.66667
t.test(midterm_df$lifesat ~midterm_df$gender)
## 
##  Welch Two Sample t-test
## 
## data:  midterm_df$lifesat by midterm_df$gender
## t = -1.3392, df = 578.22, p-value = 0.181
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.4771301  0.2794267
## sample estimates:
## mean in group 0 mean in group 1 
##        21.23869        21.83755
#1. The p-value associated with a t-value of -1.3392 is 0.181.
#2. This p-value of 0.181 is greater than alpha, 0.05.
#3. Therefore, we fail to reject the null hypothesis. 
# The null hypotheses that gender is not difference in mean life satisfaction was statistically significant.

midterm_df$highsat<-ifelse(midterm_df$lifesat>=25, 0, 1)
highsat<-table(midterm_df$highsat)
highsat
## 
##   0   1 
## 233 445
malehighsat<- midterm_df %>% filter(gender==0, highsat==0)%>%summarize(prop_male_highsat=n(), na.rm=0)
malehighsat
## Source: local data frame [1 x 2]
## 
##   prop_male_highsat na.rm
##               (int) (dbl)
## 1               129     0
prop_malehighsat <- malehighsat/404
prop_malehighsat
##   prop_male_highsat na.rm
## 1         0.3193069     0
femalehighsat<- midterm_df %>% filter(gender==1, highsat==0)%>%summarize(prop_female_highsat=n(), na.rm=0)
femalehighsat
## Source: local data frame [1 x 2]
## 
##   prop_female_highsat na.rm
##                 (int) (dbl)
## 1                 102     0
prop_femalehighsat <- femalehighsat/282
prop_femalehighsat
##   prop_female_highsat na.rm
## 1           0.3617021     0
t.test(prop_malehighsat,prop_femalehighsat)
## 
##  Welch Two Sample t-test
## 
## data:  prop_malehighsat and prop_femalehighsat
## t = -0.08787, df = 1.9697, p-value = 0.9381
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.074618  1.032223
## sample estimates:
## mean of x mean of y 
## 0.1596535 0.1808511
#1. The p-value associated with a t-value of -0.08787 is 0.9381.
#2. This p-value of 0.9381 is greater than alpha, 0.05.
#3. Therefore, we fail to reject the null hypothesis. 
# The null hypotheses that gender is not difference in mean life satisfaction among high satisfied people was statistically significant.