titanic.df <- read.csv(paste("Titanic Data.csv", sep=""))
length(titanic.df$Survived)
## [1] 889
table(titanic.df$Survived)
##
## 0 1
## 549 340
mytable <- with(titanic.df, table(Survived))
mytable # frequencies
## Survived
## 0 1
## 549 340
prop.table(mytable) # proportions
## Survived
## 0 1
## 0.6175478 0.3824522
prop.table(mytable)*100 # percentages
## Survived
## 0 1
## 61.75478 38.24522
mytable <- xtabs(~ Pclass+Survived, data=titanic.df)
mytable # frequencies
## Survived
## Pclass 0 1
## 1 80 134
## 2 97 87
## 3 372 119
prop.table(mytable) # proportions
## Survived
## Pclass 0 1
## 1 0.08998875 0.15073116
## 2 0.10911136 0.09786277
## 3 0.41844769 0.13385827
prop.table(mytable)*100 # percentages
## Survived
## Pclass 0 1
## 1 8.998875 15.073116
## 2 10.911136 9.786277
## 3 41.844769 13.385827
mytable <- xtabs(~ Pclass+Sex+Survived, data=titanic.df)
mytable # frequencies
## , , Survived = 0
##
## Sex
## Pclass female male
## 1 3 77
## 2 6 91
## 3 72 300
##
## , , Survived = 1
##
## Sex
## Pclass female male
## 1 89 45
## 2 70 17
## 3 72 47
prop.table(mytable) # proportions
## , , Survived = 0
##
## Sex
## Pclass female male
## 1 0.003374578 0.086614173
## 2 0.006749156 0.102362205
## 3 0.080989876 0.337457818
##
## , , Survived = 1
##
## Sex
## Pclass female male
## 1 0.100112486 0.050618673
## 2 0.078740157 0.019122610
## 3 0.080989876 0.052868391
prop.table(mytable)*100 # percentages
## , , Survived = 0
##
## Sex
## Pclass female male
## 1 0.3374578 8.6614173
## 2 0.6749156 10.2362205
## 3 8.0989876 33.7457818
##
## , , Survived = 1
##
## Sex
## Pclass female male
## 1 10.0112486 5.0618673
## 2 7.8740157 1.9122610
## 3 8.0989876 5.2868391
mytable <- xtabs(~ Survived+Sex, data=titanic.df)
mytable
## Sex
## Survived female male
## 0 81 468
## 1 231 109
prop.table(mytable, 1)*100 #the percentage of survivors who were female
## Sex
## Survived female male
## 0 14.75410 85.24590
## 1 67.94118 32.05882
prop.table(mytable, 2)*100 # the percentage of females on board the Titanic who survived
## Sex
## Survived female male
## 0 25.96154 81.10919
## 1 74.03846 18.89081
Hypothesis: The proportion of females onboard who survived the sinking of the Titanic was higher than the proportion of males onboard who survived the sinking of the Titanic.
mytable <- xtabs(~Sex+Survived, data=titanic.df)
addmargins(mytable)
## Survived
## Sex 0 1 Sum
## female 81 231 312
## male 468 109 577
## Sum 549 340 889
chisq.test(mytable)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable
## X-squared = 258.43, df = 1, p-value < 2.2e-16
since,the value of p is less than 0.05.Hence,we reject the hypothesis. But no of female passengers survived more than men passengers.
The Pearson’s chi-squared test is a statistical test applied to sets of categorical data to evaluate how likely it is that any observed difference between the sets arose by chance.