Load Data

library(readxl)
mydata <- read_excel("C:/Users/mattv/Desktop/ADEC 7301 Assignments/Week 1/Titanic/train.xlsx")

Set Alpha

alpha <- 0.05

Calculate Zc for 95% Confidence Interval

Zc <-qnorm(1 - alpha / 2)

Calculate Female Survival Rate

I loaded the dplyr package and used the filter function to isolate specific columns

library(dplyr) 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
female_data <- mydata %>% filter(Sex == "female")

n_female <- nrow(female_data) 
c("Number of total female passengers", n_female)
## [1] "Number of total female passengers" "314"
female_survived <- sum(female_data$Survived) 
c("Number of female passengers who survived", female_survived)
## [1] "Number of female passengers who survived"
## [2] "233"
phat_female <- female_survived/n_female
c("phat_female", phat_female)
## [1] "phat_female"      "0.74203821656051"

Calculate the Total Survival Rate

Total_Passangers <- nrow(mydata)
c("Total number of passangers", Total_Passangers)
## [1] "Total number of passangers" "891"
Total_Survived <-nrow(mydata %>% filter(Survived == "1"))
c("Total number of Survivors", Total_Survived)
## [1] "Total number of Survivors" "342"
phat_total <- mean(mydata$Survived)
c("phat_total",phat_total)
## [1] "phat_total"        "0.383838383838384"

Calculate the Standard Error for Female survival Rates

Se_female<-sqrt(phat_female * (1-phat_female)/n_female)
c("Se_female", Se_female)
## [1] "Se_female"          "0.0246902790203467"

Calculate the Standard Error for Total Survival Rates

Se_total<-sqrt(phat_total * (1-phat_total)/nrow(mydata))
c("Se_total", Se_total)
## [1] "Se_total"         "0.01629231015825"

Calculate the Confidence Interval for Female Survival Rate

upper_female <-phat_female+Zc * Se_female
lower_female <-phat_female-Zc * Se_female
c(lower_female, phat_female, upper_female)
## [1] 0.6936462 0.7420382 0.7904303

Confidenec Interval using prop.test

female_prop_test <-prop.test(x=233, n=314, conf.level = 0.98 )
female_prop_test
## 
##  1-sample proportions test with continuity correction
## 
## data:  233 out of 314, null probability 0.5
## X-squared = 72.615, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 98 percent confidence interval:
##  0.6791630 0.7964867
## sample estimates:
##         p 
## 0.7420382

Calculate the Confidence Interval for Total Survival Rate

upper_total <-phat_total+Zc*Se_total
lower_total <-phat_total-Zc*Se_total
c(lower_total, phat_total, upper_total)
## [1] 0.3519060 0.3838384 0.4157707

Confidenec Interval using prop.test

total_prop_test <-prop.test(x=342, n=891, conf.level = 0.95)
total_prop_test
## 
##  1-sample proportions test with continuity correction
## 
## data:  342 out of 891, null probability 0.5
## X-squared = 47.627, df = 1, p-value = 5.154e-12
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.3519194 0.4167722
## sample estimates:
##         p 
## 0.3838384

Hypothesis Test

Here I ran a two sample prop_test to test the hypothesis that the female survival rate is higher than the survival rate of the entire population:

Ho: There is no meaningful difference in the survival rate of females and the total survival rate.

HA: The survival rate of females is higher than the total survival rate.

After running this prop.test the outcome returned a p-value that was less than alpha. Given that outcome we can reject the H0 and conclude that the female survival rate is higher than that of the total survival rate.

hypothesis_test <-prop.test(x=c(233,342), n=c(314,891))
hypothesis_test
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(233, 342) out of c(314, 891)
## X-squared = 117.98, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.2980682 0.4183315
## sample estimates:
##    prop 1    prop 2 
## 0.7420382 0.3838384