titanic.df<-read.csv("Titanic Data.csv", sep = ",")
View(titanic.df)
dim(titanic.df)
## [1] 889 8
summary(titanic.df)
## Survived Pclass Sex Age
## Min. :0.0000 Min. :1.000 female:312 Min. : 0.40
## 1st Qu.:0.0000 1st Qu.:2.000 male :577 1st Qu.:22.00
## Median :0.0000 Median :3.000 Median :29.70
## Mean :0.3825 Mean :2.312 Mean :29.65
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:35.00
## Max. :1.0000 Max. :3.000 Max. :80.00
## SibSp Parch Fare Embarked
## Min. :0.0000 Min. :0.0000 Min. : 0.000 C:168
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 7.896 Q: 77
## Median :0.0000 Median :0.0000 Median : 14.454 S:644
## Mean :0.5242 Mean :0.3825 Mean : 32.097
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 31.000
## Max. :8.0000 Max. :6.0000 Max. :512.329
mytable<-table(titanic.df$Survived==1)
addmargins(mytable)
##
## FALSE TRUE Sum
## 549 340 889
prop<-prop.table(mytable)
100*prop
##
## FALSE TRUE
## 61.75478 38.24522
mytable2<- table(titanic.df$Pclass==1,titanic.df$Survived==1)
addmargins(mytable2)
##
## FALSE TRUE Sum
## FALSE 469 206 675
## TRUE 80 134 214
## Sum 549 340 889
prop1<- prop.table(mytable2)
100*prop1
##
## FALSE TRUE
## FALSE 52.755906 23.172103
## TRUE 8.998875 15.073116
mytable3<- table(titanic.df$Survived==1,titanic.df$Pclass==1,titanic.df$Sex)
addmargins(mytable3)
## , , = female
##
##
## FALSE TRUE Sum
## FALSE 78 3 81
## TRUE 142 89 231
## Sum 220 92 312
##
## , , = male
##
##
## FALSE TRUE Sum
## FALSE 391 77 468
## TRUE 64 45 109
## Sum 455 122 577
##
## , , = Sum
##
##
## FALSE TRUE Sum
## FALSE 469 80 549
## TRUE 206 134 340
## Sum 675 214 889
surviversBySex <- xtabs(~ Survived + Sex, data=titanic.df)
chisq.test(surviversBySex)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: surviversBySex
## X-squared = 258.43, df = 1, p-value < 2.2e-16
(chisq.test(surviversBySex))$p.value
## [1] 3.77991e-58