titanic <- read.csv(paste("Titanic Data.csv", sep=""))
head(titanic)
## Survived Pclass Sex Age SibSp Parch Fare Embarked
## 1 0 3 male 22.0 1 0 7.2500 S
## 2 1 1 female 38.0 1 0 71.2833 C
## 3 1 3 female 26.0 0 0 7.9250 S
## 4 1 1 female 35.0 1 0 53.1000 S
## 5 0 3 male 35.0 0 0 8.0500 S
## 6 0 3 male 29.7 0 0 8.4583 Q
library(car)
some(titanic)
## Survived Pclass Sex Age SibSp Parch Fare Embarked
## 40 1 3 female 14.0 1 0 11.2417 C
## 152 0 3 male 55.5 0 0 8.0500 S
## 203 0 3 male 45.5 0 0 7.2250 C
## 226 1 2 male 19.0 0 0 10.5000 S
## 328 1 3 female 31.0 1 1 20.5250 S
## 552 0 3 male 29.7 0 0 7.8292 Q
## 563 0 3 male 29.7 0 0 8.0500 S
## 571 1 1 female 53.0 2 0 51.4792 S
## 646 0 3 male 19.0 0 0 7.8958 S
## 682 0 3 male 20.0 0 0 9.2250 S
str(titanic)
## 'data.frame': 889 obs. of 8 variables:
## $ Survived: int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
## $ Age : num 22 38 26 35 35 29.7 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Embarked: Factor w/ 3 levels "C","Q","S": 3 1 3 3 3 2 3 3 3 1 ...
attach(titanic)
library(psych)
describe(titanic)
## vars n mean sd median trimmed mad min max range
## Survived 1 889 0.38 0.49 0.00 0.35 0.00 0.0 1.00 1.00
## Pclass 2 889 2.31 0.83 3.00 2.39 0.00 1.0 3.00 2.00
## Sex* 3 889 1.65 0.48 2.00 1.69 0.00 1.0 2.00 1.00
## Age 4 889 29.65 12.97 29.70 29.22 9.34 0.4 80.00 79.60
## SibSp 5 889 0.52 1.10 0.00 0.27 0.00 0.0 8.00 8.00
## Parch 6 889 0.38 0.81 0.00 0.19 0.00 0.0 6.00 6.00
## Fare 7 889 32.10 49.70 14.45 21.28 10.24 0.0 512.33 512.33
## Embarked* 8 889 2.54 0.79 3.00 2.67 0.00 1.0 3.00 2.00
## skew kurtosis se
## Survived 0.48 -1.77 0.02
## Pclass -0.63 -1.27 0.03
## Sex* -0.62 -1.61 0.02
## Age 0.43 0.96 0.43
## SibSp 3.68 17.69 0.04
## Parch 2.74 9.66 0.03
## Fare 4.79 33.23 1.67
## Embarked* -1.26 -0.23 0.03
dim(titanic)
## [1] 889 8
# total number of passengers
dim(titanic)[1]
## [1] 889
# using subset() function
nrow(subset(titanic, Survived == 1))
## [1] 340
titanic$Survived <- as.factor(titanic$Survived)
# number of passengers who survived and who died
survivedTable <- table(titanic$Survived)
survivedTable
##
## 0 1
## 549 340
# proportion of survived and died
prop <- prop.table(survivedTable)
# percentage
propPer <- prop*100
propPer
##
## 0 1
## 61.75478 38.24522
# percentage of passengers who survived
propPer[2]
## 1
## 38.24522
mytab <- xtabs(~ Survived + Pclass, data=titanic)
# sum of the passengers based on the passengers class and survived
addmargins(mytab)
## Pclass
## Survived 1 2 3 Sum
## 0 80 97 372 549
## 1 134 87 119 340
## Sum 214 184 491 889
titanic$Pclass <- as.factor(titanic$Pclass)
# frequency counts of the passengers who survived
barplot(mytab,
main="Survival by Passenger Class",
xlab="Passenger Class", ylab="Frequency",
col=c("grey", "black"),
legend=c("Died", "Survived"), beside=TRUE)
# using subset() function
nrow(subset(titanic, Survived == 1 & Pclass == 1))
## [1] 134
# using which() function
length(which(Survived == 1 & Pclass == 1))
## [1] 134
surviversByClass <- xtabs(~ Survived + Pclass, data=titanic)
# proportion of first-class passengers who survived the sinking of the Titanic
prop.table(surviversByClass, 2)
## Pclass
## Survived 1 2 3
## 0 0.3738318 0.5271739 0.7576375
## 1 0.6261682 0.4728261 0.2423625
# percentage of first-class passengers who survived the sinking of the Titanic
100*prop.table(surviversByClass, 2)[2,1]
## [1] 62.61682
# three way contingency table
# Survived, Sex and Passenger Class
mytable1 <- xtabs(~ Survived + Sex + Pclass, data=titanic)
addmargins(mytable1)
## , , Pclass = 1
##
## Sex
## Survived female male Sum
## 0 3 77 80
## 1 89 45 134
## Sum 92 122 214
##
## , , Pclass = 2
##
## Sex
## Survived female male Sum
## 0 6 91 97
## 1 70 17 87
## Sum 76 108 184
##
## , , Pclass = 3
##
## Sex
## Survived female male Sum
## 0 72 300 372
## 1 72 47 119
## Sum 144 347 491
##
## , , Pclass = Sum
##
## Sex
## Survived female male Sum
## 0 81 468 549
## 1 231 109 340
## Sum 312 577 889
# frquency counts
ftable(mytable1)
## Pclass 1 2 3
## Survived Sex
## 0 female 3 6 72
## male 77 91 300
## 1 female 89 70 72
## male 45 17 47
# frequency counts of the passengers
ftab <- ftable(prop.table(mytable1))
# converting to percentages
ftabPer <- ftab*100
# rounding values to 2 decimal places.
round(ftabPer, 2)
## Pclass 1 2 3
## Survived Sex
## 0 female 0.34 0.67 8.10
## male 8.66 10.24 33.75
## 1 female 10.01 7.87 8.10
## male 5.06 1.91 5.29
# three way contingency table
# Survived, Passenger Classs and Sex
mytable2 <- xtabs(~ Survived + Pclass + Sex, data=titanic)
addmargins(mytable2)
## , , Sex = female
##
## Pclass
## Survived 1 2 3 Sum
## 0 3 6 72 81
## 1 89 70 72 231
## Sum 92 76 144 312
##
## , , Sex = male
##
## Pclass
## Survived 1 2 3 Sum
## 0 77 91 300 468
## 1 45 17 47 109
## Sum 122 108 347 577
##
## , , Sex = Sum
##
## Pclass
## Survived 1 2 3 Sum
## 0 80 97 372 549
## 1 134 87 119 340
## Sum 214 184 491 889
# frquency counts
ftable(mytable2)
## Sex female male
## Survived Pclass
## 0 1 3 77
## 2 6 91
## 3 72 300
## 1 1 89 45
## 2 70 17
## 3 72 47
# frequency counts of the passengers
ftab <- ftable(prop.table(mytable2))
# converting to percentages
ftabPer <- ftab*100
# rounding values to 2 decimal places.
round(ftabPer, 2)
## Sex female male
## Survived Pclass
## 0 1 0.34 8.66
## 2 0.67 10.24
## 3 8.10 33.75
## 1 1 10.01 5.06
## 2 7.87 1.91
## 3 8.10 5.29
# percent of total of the passengers who survived
par(mfrow=c(1,2),mar=c(4,4,3,3))
tab2 <- xtabs(~ Survived + Pclass + Sex, data=titanic)
barplot(tab2[,,1],
main=dimnames(tab2)$Sex[1],
beside=TRUE,
ylim=c(0,400),
col=c("grey","black"),
ylab="Number of Passengers",
legend=c("Died", "Survived"),
args.legend=list(x="topleft"))
barplot(tab2[,,2],
main=dimnames(tab2)$Sex[2],
beside=TRUE,
ylim=c(0,400),
col=c("grey","black"),
#ylab="Number of Passengers",
legend=c("Died", "Survived"),
args.legend=list(x="topleft"))
# percent of total of the passengers who survived
par(mfrow=c(1,3),mar=c(8,4,3,3))
tab2 <- xtabs(~ Survived + Sex + Pclass, data=titanic)
barplot(tab2[,,1],
main=dimnames(tab2)$Pclass[1],
beside=TRUE,
ylim=c(0,350),
col=c("grey","black"),
ylab="Number of Passengers",
legend=c("Died", "Survived"),
args.legend=list(x="topleft"))
barplot(tab2[,,2],
main=dimnames(tab2)$Pclass[2],
beside=TRUE,
ylim=c(0,350),
col=c("grey","black"),
#ylab="Number of Passengers",
legend=c("Died", "Survived"),
args.legend=list(x="topleft"))
barplot(tab2[,,3],
main=dimnames(tab2)$Pclass[3],
beside=TRUE,
ylim=c(0,350),
col=c("grey","black"),
#ylab="Number of Passengers",
legend=c("Died", "Survived"),
args.legend=list(x="topleft"))
# females traveling by first-class survived the sinking of the Titanic
ftable(mytable1)[3]
## [1] 89
surviversBySex <- xtabs(~ Survived + Sex, data=titanic)
# frequency counts of survivors who were female
surviversBySex
## Sex
## Survived female male
## 0 81 468
## 1 231 109
# proportions
propSur <- prop.table(surviversBySex,1)
# percentage
propSurPer <- propSur*100
# percentage of survivors who were female
propSurPer[2,1]
## [1] 67.94118
slices <- c(67.94118, 32.05882)
lbls <- c("Female survivers", "Female who died")
# percentage of female who survived and who died
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)
lbls <- paste(lbls,"%",sep="")
pie(slices,labels = lbls, col = c("grey", "black"),
main="Pie Chart with Percentages")
# proportion
propSur2 <- prop.table(surviversBySex,2)
# percentage
propSur2Per <- propSur2*100
# percentage of total females on the Titanic who survived
propSur2Per
## Sex
## Survived female male
## 0 25.96154 81.10919
## 1 74.03846 18.89081
slices <- c(74.03846, 25.96154)
lbls <- c("Total female survivers", "Total female who died")
# percentage of total female who survived and who died
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)
lbls <- paste(lbls,"%",sep="")
pie(slices,labels = lbls, col = c("grey", "black"),
main="Pie Chart with Percentages")
chisq.test(surviversBySex)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: surviversBySex
## X-squared = 258.43, df = 1, p-value < 2.2e-16
Percentage of female survivers was higher than percentage of male survivers.
# p-value
(chisq.test(surviversBySex))$p.value
## [1] 3.77991e-58
The p-value of the previous Pearson’s Chi-squared test is \(3.77991e-58\).
library(vcd)
# Passenger Class, Sex and Survived
mosaic(~ Pclass + Sex + Survived, data=titanic, shade=TRUE, legend=TRUE)
## Loading required package: grid
library(vcd)
# Sex, Passenger Class and Survived
mosaic(~ Sex + Pclass + Survived, data=titanic, shade=TRUE, legend=TRUE)
library(vcd)
# Survived, Passenger Class and Sex
mosaic(~ Survived + Pclass + Sex, data=titanic, shade=TRUE, legend=TRUE)