library(vcd)
2.b Read the data
Load the titanic data file
ti.df <- read.csv(paste("Titanic Data.csv", sep=""))
View the data
View(ti.df)
3.a to count the total number of passengers on board the Titanic.
library(psych)
describe(ti.df$Age) #where n is total no of passengers
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 889 29.65 12.97 29.7 29.22 9.34 0.4 80 79.6 0.43 0.96
## se
## X1 0.43
3.a(alternate way)
mytable <- with(ti.df, table(Survived))
margin.table(mytable)
## [1] 889
3.b To count the number of passengers onboard who survived.
mytable <- with(ti.df, table(Survived))
mytable #where 1= survival
## Survived
## 0 1
## 549 340
3.c to measure the percentage of passengers who survived the sinking of the Titanic.
prop.table(mytable)*100 #where 1= survival
## Survived
## 0 1
## 61.75478 38.24522
3.d to count the number of first-class passengers who survived the sinking of the Titanic
mytable <- xtabs(~ Survived+Pclass, data=ti.df)
mytable # here 1=survival , and for class 1= ist class
## Pclass
## Survived 1 2 3
## 0 80 97 372
## 1 134 87 119
3.e to measure the percentage of first-class passengers who survived the sinking of the Titanic
margin.table(mytable) #to get the total no of passengers
## [1] 889
prop.table(mytable)*100 #to get the percentage of first class passenger who survived to total no of passenger
## Pclass
## Survived 1 2 3
## 0 8.998875 10.911136 41.844769
## 1 15.073116 9.786277 13.385827
3.f to count the number of females from First-Class who survived the sinking of the Titanic
mytable <- xtabs(~ Survived+Sex+Pclass, data=ti.df)
ftable(mytable) #here 1=survival , and for class 1= ist class
## Pclass 1 2 3
## Survived Sex
## 0 female 3 6 72
## male 77 91 300
## 1 female 89 70 72
## male 45 17 47
3.g to measure the percentage of survivors who were female
mytable <- xtabs(~ Survived+Sex, data=ti.df)
margin.table(mytable) #for reference for total no of passengers
## [1] 889
prop.table(mytable)*100
## Sex
## Survived female male
## 0 9.111361 52.643420
## 1 25.984252 12.260967
3.h to measure the percentage of females on board the Titanic who survived with total no of female
margin.table(mytable, 2)
## Sex
## female male
## 312 577
prop.table(mytable, 2) #where 1= survived
## Sex
## Survived female male
## 0 0.2596154 0.8110919
## 1 0.7403846 0.1889081
3.i Run a Pearson’s Chi-squared test to test the following hypothesis:
Hypothesis: The proportion of females onboard who survived the sinking of the Titanic was higher than the proportion of males onboard who survived the sinking of the Titanic.
mytable <- xtabs(~ Sex+Survived, data=ti.df)
addmargins(mytable) #to just have a guess or to review
## Survived
## Sex 0 1 Sum
## female 81 231 312
## male 468 109 577
## Sum 549 340 889
chisq.test(mytable)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable
## X-squared = 258.43, df = 1, p-value < 2.2e-16
as after running the Chi-squared test
we can say(p < 0.01) which means the hypothesis is correct.