October 25, 2018

Introduction

The purpose of this study is to analyse the survival rates by Class of Passengers and Crew with respect to the Titanic disaster.

Two plots will be generated using plotly:

  • The first is a stacked bar chart breaking out the number of survivors vs non-survivors by Class
  • The second is a set of two pie charts showing percent survival and non-survival rates by Class

Dataset Exploratory Analysis

head(titanic,3)
##   Class  Sex   Age Survived Freq
## 1   1st Male Child       No    0
## 2   2nd Male Child       No    0
## 3   3rd Male Child       No   35
str(titanic)
## 'data.frame':    32 obs. of  5 variables:
##  $ Class   : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Sex     : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq    : num  0 0 35 0 0 0 17 0 118 154 ...

Titanic Survival Counts by Class

Titanic % Survival Rates by Class

Conclusion

The majority of the fatalities occured among the crew and third class passengers, followed by second class passengers and relatively few first class passengers. This is most ikely due to the fact that the lower classes and a significant number of crewmen were in the lower decks of the ship.

Appendix

#load libraries
library(ggplot2)
library(tidyr)
library(plotly)
library(dplyr)

Prepare Bar Chart Data

#Prepare data for plotting
titanic_wide <- spread(titanic,Class,Freq)
names(titanic_wide)[4]="First"
names(titanic_wide)[5]="Second"
names(titanic_wide)[6]="Third"
#Group by Survived, summarize by class, sum total
titanic_summary <- titanic_wide %>% 
  group_by(Survived) %>% summarize_if(is.numeric,sum)
titanic_plot <- titanic_wide %>% 
  group_by(Survived) %>% summarize_if(is.numeric,sum)%>%
  rowwise()%>%mutate(Total = sum(c(First,Second,Third,Crew)))

Plot Bar Chart

#Prepare total Survived chart annotation
total_survival <- list(x =titanic_plot$Survived,
                       y=titanic_plot$Total,
                    text = as.character(titanic_plot$Total),
                    yanchor = "bottom",
                    showarrow = FALSE)
t <- plot_ly(titanic_plot, x = ~Survived, y = ~First,
             type ="bar",name ="1st") %>%
  add_trace(y = ~Second, name = '2nd') %>%
  add_trace(y = ~Third, name = '3rd') %>%
  add_trace(y = ~Crew, name = 'Crew') %>%
  layout(yaxis = list(title = 'Count'), 
         barmode = 'stack',
         annotations = total_survival,hovermode = "compare")
t

Prepare Pie Chart Data

survived <- filter(titanic_summary, Survived == "Yes")%>%
  select(-Survived)%>%gather(Class,Count)
perished <- filter(titanic_summary, Survived == "No")%>%
  select(-Survived)%>%gather(Class,Count)

Plot Pie Charts

plot_ly()%>% add_pie(data=perished,labels=~Class,values=~Count,
             name='Count',domain = list(x = c(0.0,0.25), 
        y = c(0.4, 1.0)),textposition = 'inside',
        textinfo = 'label+percent')%>%
     add_pie(data=survived,labels=~Class,values=~Count,
             name='Count',domain = list(x = c(0.6, 0.85), 
        y = c(0.4, 1.0)),textposition = 'inside',
        textinfo = 'label+percent')%>%
     layout( showlegend = FALSE,autosize=TRUE,
             xaxis = list(showgrid = FALSE, zeroline = FALSE,
                          showticklabels = FALSE),
             yaxis = list(showgrid = FALSE, zeroline = FALSE,
                          showticklabels = FALSE),
             annotations = list(
             list(x = 0.1 , y = 1.0, text = "Perished",
                  showarrow = F, xref='paper', yref='paper'),
             list(x = 0.75 , y = 1.0, text = "Survived", 
                  showarrow = F, xref='paper', yref='paper')))