Read the Titanic dataset

titanic <- read.csv(paste("Titanic Data.csv", sep=""))
head(titanic)  
##   Survived Pclass    Sex  Age SibSp Parch    Fare Embarked
## 1        0      3   male 22.0     1     0  7.2500        S
## 2        1      1 female 38.0     1     0 71.2833        C
## 3        1      3 female 26.0     0     0  7.9250        S
## 4        1      1 female 35.0     1     0 53.1000        S
## 5        0      3   male 35.0     0     0  8.0500        S
## 6        0      3   male 29.7     0     0  8.4583        Q

Random selection of rows using car::some() function

library(car)   
some(titanic) 
##     Survived Pclass    Sex  Age SibSp Parch    Fare Embarked
## 40         1      3 female 14.0     1     0 11.2417        C
## 152        0      3   male 55.5     0     0  8.0500        S
## 203        0      3   male 45.5     0     0  7.2250        C
## 226        1      2   male 19.0     0     0 10.5000        S
## 328        1      3 female 31.0     1     1 20.5250        S
## 552        0      3   male 29.7     0     0  7.8292        Q
## 563        0      3   male 29.7     0     0  8.0500        S
## 571        1      1 female 53.0     2     0 51.4792        S
## 646        0      3   male 19.0     0     0  7.8958        S
## 682        0      3   male 20.0     0     0  9.2250        S

Structure of the data frame

str(titanic)   
## 'data.frame':    889 obs. of  8 variables:
##  $ Survived: int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass  : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Sex     : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : num  22 38 26 35 35 29.7 54 2 27 14 ...
##  $ SibSp   : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch   : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Fare    : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Embarked: Factor w/ 3 levels "C","Q","S": 3 1 3 3 3 2 3 3 3 1 ...

Summary Statistics of the data

attach(titanic)  
library(psych)
describe(titanic)   
##           vars   n  mean    sd median trimmed   mad min    max  range
## Survived     1 889  0.38  0.49   0.00    0.35  0.00 0.0   1.00   1.00
## Pclass       2 889  2.31  0.83   3.00    2.39  0.00 1.0   3.00   2.00
## Sex*         3 889  1.65  0.48   2.00    1.69  0.00 1.0   2.00   1.00
## Age          4 889 29.65 12.97  29.70   29.22  9.34 0.4  80.00  79.60
## SibSp        5 889  0.52  1.10   0.00    0.27  0.00 0.0   8.00   8.00
## Parch        6 889  0.38  0.81   0.00    0.19  0.00 0.0   6.00   6.00
## Fare         7 889 32.10 49.70  14.45   21.28 10.24 0.0 512.33 512.33
## Embarked*    8 889  2.54  0.79   3.00    2.67  0.00 1.0   3.00   2.00
##            skew kurtosis   se
## Survived   0.48    -1.77 0.02
## Pclass    -0.63    -1.27 0.03
## Sex*      -0.62    -1.61 0.02
## Age        0.43     0.96 0.43
## SibSp      3.68    17.69 0.04
## Parch      2.74     9.66 0.03
## Fare       4.79    33.23 1.67
## Embarked* -1.26    -0.23 0.03

Dimension of the data frame

dim(titanic)   
## [1] 889   8

Q1a. How many passengers were on board the Titanic?

# total number of passengers
dim(titanic)[1]
## [1] 889

Q1b. How many passengers survived the sinking of the Titanic?

# using subset() function 
nrow(subset(titanic, Survived == 1))
## [1] 340

Q1c. Create a one-way contingency table of the Titanic passengers based on who survived and who died.

titanic$Survived <- as.factor(titanic$Survived)
# number of passengers who survived and who died
survivedTable <- table(titanic$Survived)
survivedTable
## 
##   0   1 
## 549 340

Q1d. What was the percentage of passengers who survived the sinking of the Titanic?

# proportion of survived and died
prop <- prop.table(survivedTable)

# percentage
propPer <- prop*100
propPer
## 
##        0        1 
## 61.75478 38.24522
# percentage of passengers who survived 
propPer[2]
##        1 
## 38.24522

Q2a. Create a two-way contingency table characterising the passengers based on survival and based on the passenger class.

mytab <- xtabs(~ Survived + Pclass, data=titanic)
# sum of the passengers based on the passengers class and survived
addmargins(mytab)
##         Pclass
## Survived   1   2   3 Sum
##      0    80  97 372 549
##      1   134  87 119 340
##      Sum 214 184 491 889

Q2b. Visualize your table using a Bar-plot.

titanic$Pclass <- as.factor(titanic$Pclass)
# frequency counts of the passengers who survived
barplot(mytab, 
        main="Survival by Passenger Class", 
        xlab="Passenger Class", ylab="Frequency",
        col=c("grey", "black"),
        legend=c("Died", "Survived"), beside=TRUE)

Q2b. Visualize your table using a Bar-plot.

Q2c. How many first-class passengers survived the sinking of the Titanic?

# using subset() function 
nrow(subset(titanic, Survived == 1 & Pclass == 1))
## [1] 134
# using which() function
length(which(Survived == 1 & Pclass == 1))
## [1] 134

Q2d. What was the percentage of first-class passengers who survived the sinking of the Titanic?

surviversByClass <- xtabs(~ Survived + Pclass, data=titanic)

# proportion of first-class passengers who survived the sinking of the Titanic  
prop.table(surviversByClass, 2)
##         Pclass
## Survived         1         2         3
##        0 0.3738318 0.5271739 0.7576375
##        1 0.6261682 0.4728261 0.2423625
# percentage of first-class passengers who survived the sinking of the Titanic  
100*prop.table(surviversByClass, 2)[2,1]
## [1] 62.61682

Q3a. Create a three-way contingency table showing the number of passengers based on the passengers Survived, Sex and Passenger Class.

# three way contingency table
# Survived, Sex and Passenger Class
mytable1 <- xtabs(~ Survived + Sex + Pclass, data=titanic)
addmargins(mytable1)

Q3a. Create a three-way contingency table showing the number of passengers based on the passengers Survived, Sex and Passenger Class.

## , , Pclass = 1
## 
##         Sex
## Survived female male Sum
##      0        3   77  80
##      1       89   45 134
##      Sum     92  122 214
## 
## , , Pclass = 2
## 
##         Sex
## Survived female male Sum
##      0        6   91  97
##      1       70   17  87
##      Sum     76  108 184
## 
## , , Pclass = 3
## 
##         Sex
## Survived female male Sum
##      0       72  300 372
##      1       72   47 119
##      Sum    144  347 491
## 
## , , Pclass = Sum
## 
##         Sex
## Survived female male Sum
##      0       81  468 549
##      1      231  109 340
##      Sum    312  577 889

Q3a. Create a three-way contingency table showing the number of passengers based on the Survived, Sex and Passenger Class.

# frquency counts
ftable(mytable1)
##                 Pclass   1   2   3
## Survived Sex                      
## 0        female          3   6  72
##          male           77  91 300
## 1        female         89  70  72
##          male           45  17  47

Q3a. Express the previous question in percentages.

# frequency counts of the passengers
ftab <- ftable(prop.table(mytable1))

# converting to percentages
ftabPer <- ftab*100

# rounding values to 2 decimal places.
round(ftabPer, 2)
##                 Pclass     1     2     3
## Survived Sex                            
## 0        female         0.34  0.67  8.10
##          male           8.66 10.24 33.75
## 1        female        10.01  7.87  8.10
##          male           5.06  1.91  5.29

Q3a. Create a three-way contingency table showing the number of passengers based on the Survival, Passenger Class and Sex.

# three way contingency table
# Survived, Passenger Classs and Sex
mytable2 <- xtabs(~ Survived + Pclass + Sex, data=titanic)
addmargins(mytable2)

Q3a. Create a three-way contingency table showing the number of passengers based on the Survival, Passenger Class and Sex.

## , , Sex = female
## 
##         Pclass
## Survived   1   2   3 Sum
##      0     3   6  72  81
##      1    89  70  72 231
##      Sum  92  76 144 312
## 
## , , Sex = male
## 
##         Pclass
## Survived   1   2   3 Sum
##      0    77  91 300 468
##      1    45  17  47 109
##      Sum 122 108 347 577
## 
## , , Sex = Sum
## 
##         Pclass
## Survived   1   2   3 Sum
##      0    80  97 372 549
##      1   134  87 119 340
##      Sum 214 184 491 889

Q3a. Create a three-way contingency table showing the number of passengers based on the Survival, Passenger Class and Sex.

# frquency counts
ftable(mytable2)
##                 Sex female male
## Survived Pclass                
## 0        1               3   77
##          2               6   91
##          3              72  300
## 1        1              89   45
##          2              70   17
##          3              72   47

Q3b. Express the previous question in percentages.

# frequency counts of the passengers
ftab <- ftable(prop.table(mytable2))

# converting to percentages
ftabPer <- ftab*100

# rounding values to 2 decimal places.
round(ftabPer, 2)
##                 Sex female  male
## Survived Pclass                 
## 0        1            0.34  8.66
##          2            0.67 10.24
##          3            8.10 33.75
## 1        1           10.01  5.06
##          2            7.87  1.91
##          3            8.10  5.29

Challenge Question C1: Visualize your table using Barplot.

# percent of total of the passengers who survived
par(mfrow=c(1,2),mar=c(4,4,3,3))
tab2 <- xtabs(~ Survived + Pclass + Sex, data=titanic)
barplot(tab2[,,1],
    main=dimnames(tab2)$Sex[1],
    beside=TRUE,
    ylim=c(0,400),
    col=c("grey","black"),
    ylab="Number of Passengers",
    legend=c("Died", "Survived"),
    args.legend=list(x="topleft"))

barplot(tab2[,,2],
    main=dimnames(tab2)$Sex[2],
    beside=TRUE,
    ylim=c(0,400),
    col=c("grey","black"),
    #ylab="Number of Passengers",
    legend=c("Died", "Survived"),
    args.legend=list(x="topleft"))

Challenge Question C1: Visualize your table using Barplot.

Challenge Question C1: Visualize your table using Barplot.

# percent of total of the passengers who survived
par(mfrow=c(1,3),mar=c(8,4,3,3))
tab2 <- xtabs(~ Survived + Sex + Pclass, data=titanic)
barplot(tab2[,,1],
    main=dimnames(tab2)$Pclass[1],
    beside=TRUE,
    ylim=c(0,350),
    col=c("grey","black"),
    ylab="Number of Passengers",
    legend=c("Died", "Survived"),
    args.legend=list(x="topleft"))

barplot(tab2[,,2],
    main=dimnames(tab2)$Pclass[2],
    beside=TRUE,
    ylim=c(0,350),
    col=c("grey","black"),
    #ylab="Number of Passengers",
    legend=c("Died", "Survived"),
    args.legend=list(x="topleft"))

barplot(tab2[,,3],
    main=dimnames(tab2)$Pclass[3],
    beside=TRUE,
    ylim=c(0,350),
    col=c("grey","black"),
    #ylab="Number of Passengers",
    legend=c("Died", "Survived"),
    args.legend=list(x="topleft"))

Challenge Question C1: Visualize your table using Barplot.

Q3c. How many Females traveling by First-Class survived the sinking of the Titanic?

# females traveling by first-class survived the sinking of the Titanic
ftable(mytable1)[3]
## [1] 89

Q3d. What was the percentage of survivors who were female?

surviversBySex <- xtabs(~ Survived + Sex, data=titanic)
# frequency counts of survivors who were female
surviversBySex
##         Sex
## Survived female male
##        0     81  468
##        1    231  109
# proportions
propSur <- prop.table(surviversBySex,1)

# percentage
propSurPer <- propSur*100

# percentage of survivors who were female
propSurPer[2,1]
## [1] 67.94118

Challenge Question C2: Visualize your answer in Q3d using a Pie-chart.

slices <- c(67.94118, 32.05882)
lbls <- c("Female survivers", "Female who died")

# percentage of female who survived and who died
pct <- round(slices/sum(slices)*100)                      
lbls <- paste(lbls, pct) 
lbls <- paste(lbls,"%",sep="")
pie(slices,labels = lbls, col = c("grey", "black"),
    main="Pie Chart with Percentages")

Challenge Question C2: Visualize your answer in Q3d using a Pie-chart.

Q3e. What was the percentage of females on board the Titanic who survived?

# proportion
propSur2 <- prop.table(surviversBySex,2)

# percentage
propSur2Per <- propSur2*100

# percentage of total females on the Titanic who survived
propSur2Per
##         Sex
## Survived   female     male
##        0 25.96154 81.10919
##        1 74.03846 18.89081

Challenge Question C3: Visualize your answer in Q3f using a Pie-chart.

slices <- c(74.03846, 25.96154)
lbls <- c("Total female survivers", "Total female who died")

# percentage of total female who survived and who died
pct <- round(slices/sum(slices)*100)                      
lbls <- paste(lbls, pct) 
lbls <- paste(lbls,"%",sep="")
pie(slices,labels = lbls, col = c("grey", "black"),
    main="Pie Chart with Percentages")

Challenge Question C3: Visualize your answer in Q3f using a Pie-chart.

Q4a. Use a Pearson’s Chi-squared test to evaluate whether the proportion of females who survived was larger than the proportion of males who survived?

chisq.test(surviversBySex)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  surviversBySex
## X-squared = 258.43, df = 1, p-value < 2.2e-16

Percentage of female survivers was higher than percentage of male survivers.

Q4b. What is the p-value of the previous Pearson’s Chi-squared test?

# p-value
(chisq.test(surviversBySex))$p.value
## [1] 3.77991e-58

The p-value of the previous Pearson’s Chi-squared test is \(3.77991e-58\).

Challenge Question C4: Create a Mosaic Plot of Titanic survivors and nonsurvivors based on gender (male/female), passenger class (First/Second/Third).

library(vcd)
# Passenger Class, Sex and Survived
mosaic(~ Pclass + Sex + Survived, data=titanic, shade=TRUE, legend=TRUE)

Challenge Question C4: Create a Mosaic Plot of Titanic survivors and nonsurvivors based on gender (male/female), passenger class (First/Second/Third).

## Loading required package: grid

Challenge Question C4: Create a Mosaic Plot of Titanic survivors and nonsurvivors based on gender (male/female), passenger class (First/Second/Third).

library(vcd)
# Sex, Passenger Class and Survived
mosaic(~ Sex + Pclass + Survived, data=titanic, shade=TRUE, legend=TRUE)

Challenge Question C4: Create a Mosaic Plot of Titanic survivors and nonsurvivors based on gender (male/female), passenger class (First/Second/Third).

Challenge Question C4: Create a Mosaic Plot of Titanic survivors and nonsurvivors based on gender (male/female), passenger class (First/Second/Third).

library(vcd)
# Survived, Passenger Class and Sex
mosaic(~ Survived + Pclass + Sex, data=titanic, shade=TRUE, legend=TRUE)

Challenge Question C4: Create a Mosaic Plot of Titanic survivors and nonsurvivors based on gender (male/female), passenger class (First/Second/Third).