Import File

titanic=read.csv(paste("Titanic Data.csv",sep=""))

View Data File

View(titanic)

Count the number of passengers onboard

count=nrow(titanic)
count
## [1] 889

Count the number of passengers who survived

#aggregate(titanic$Survived, by=list(titanic$Survived==1), sum)
#count <- table(titanic$Survived)
count=sum(titanic$Survived)
count
## [1] 340

the percentage of passengers who survived

percent_sur=mean(titanic$Survived)*100
percent_sur
## [1] 38.24522

Count the number of first-class passengers who survived

table1=xtabs(~Survived+Pclass,data=titanic)
table1[2,1]
## [1] 134

the percentage of first-class passengers who survived

table2=prop.table(table1,2)
#table2
table2[2,1]*100
## [1] 62.61682

Count the number of first-class female passengers who survived

table3=xtabs(~Survived+Pclass+Sex,data=titanic)
table4=ftable(table3)
#table4
table4[4,1]
## [1] 89

the percentage of survivers who were female

table5=xtabs(~Survived+Sex,data=titanic)
table6=prop.table(table5,1)
table6[2,1]*100
## [1] 67.94118

the percentage of female passengers who survived

table5=xtabs(~Survived+Sex,data=titanic)
table6=prop.table(table5,2)
table6[2,1]*100
## [1] 74.03846

Pearson’s Chi-squared test to test the following hypothesis:

Hypothesis: The proportion of females onboard who survived the sinking of the Titanic was higher than the proportion of males onboard who survived the sinking of the Titanic.

#addmargins(table2)
table7=xtabs(~Sex+Survived,data=titanic)
chisq.test(table7)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table7
## X-squared = 258.43, df = 1, p-value < 2.2e-16

Since p<0.01 the Null hypothesis is rejected, i.e. the two variables, Sex and Survived are interdependent