titanic=read.csv(paste("Titanic Data.csv",sep=""))
View(titanic)
count=nrow(titanic)
count
## [1] 889
#aggregate(titanic$Survived, by=list(titanic$Survived==1), sum)
#count <- table(titanic$Survived)
count=sum(titanic$Survived)
count
## [1] 340
percent_sur=mean(titanic$Survived)*100
percent_sur
## [1] 38.24522
table1=xtabs(~Survived+Pclass,data=titanic)
table1[2,1]
## [1] 134
table2=prop.table(table1,2)
#table2
table2[2,1]*100
## [1] 62.61682
table3=xtabs(~Survived+Pclass+Sex,data=titanic)
table4=ftable(table3)
#table4
table4[4,1]
## [1] 89
table5=xtabs(~Survived+Sex,data=titanic)
table6=prop.table(table5,1)
table6[2,1]*100
## [1] 67.94118
table5=xtabs(~Survived+Sex,data=titanic)
table6=prop.table(table5,2)
table6[2,1]*100
## [1] 74.03846
Hypothesis: The proportion of females onboard who survived the sinking of the Titanic was higher than the proportion of males onboard who survived the sinking of the Titanic.
#addmargins(table2)
table7=xtabs(~Sex+Survived,data=titanic)
chisq.test(table7)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table7
## X-squared = 258.43, df = 1, p-value < 2.2e-16
Since p<0.01 the Null hypothesis is rejected, i.e. the two variables, Sex and Survived are interdependent