#-----------Titanic Case Analysis-------------------
#1 Reading Data
titanic.df <- read.csv(paste("Titanic Data.csv", sep =""))
#2 Viewing data
View(titanic.df)
#3 number of passengers on board
dim(titanic.df)
## [1] 889 8
#4 number of passangers survived the sinking of titanic
table(titanic.df$Survived)
##
## 0 1
## 549 340
#5 percentage of passengers who survived the sinking of titanic
mytable <- table(titanic.df$Survived)
mytable
##
## 0 1
## 549 340
prop <- prop.table(mytable)
propPer <- prop*100
propPer
##
## 0 1
## 61.75478 38.24522
propPer[2]
## 1
## 38.24522
#6 number of first class passenger who survived the sinking
mytab <- xtabs(~ Survived + Pclass, data=titanic.df)
addmargins(mytab)
## Pclass
## Survived 1 2 3 Sum
## 0 80 97 372 549
## 1 134 87 119 340
## Sum 214 184 491 889
nrow(subset(titanic.df, Survived == 1 & Pclass == 1))
## [1] 134
surviversByClass <- xtabs(~ Survived + Pclass, data=titanic.df)
prop.table(surviversByClass, 2)
## Pclass
## Survived 1 2 3
## 0 0.3738318 0.5271739 0.7576375
## 1 0.6261682 0.4728261 0.2423625
#7 percentage of first class passenger who survived the sinking
100*prop.table(surviversByClass, 2)[2,1]
## [1] 62.61682
#8 number of females from First-Class who survived the sinking of the Titanic
mytable1 <- xtabs(~ Survived + Sex + Pclass, data=titanic.df)
mytable1
## , , Pclass = 1
##
## Sex
## Survived female male
## 0 3 77
## 1 89 45
##
## , , Pclass = 2
##
## Sex
## Survived female male
## 0 6 91
## 1 70 17
##
## , , Pclass = 3
##
## Sex
## Survived female male
## 0 72 300
## 1 72 47
ftable(mytable1)[3]
## [1] 89
#9 percentage of survivors who were female
surviversBySex <- xtabs(~ Survived + Sex, data=titanic.df)
surviversBySex
## Sex
## Survived female male
## 0 81 468
## 1 231 109
surviversBySex [2]
## [1] 231
propSur <- prop.table(surviversBySex,1)
propSurPer <- propSur*100
propSurPer[2,1]
## [1] 67.94118
#10 Chi- Square Test
chisq.test(surviversBySex)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: surviversBySex
## X-squared = 258.43, df = 1, p-value < 2.2e-16