Titanic1

Om Joy Halder
26-09-2017

Q1a: How many passengers were on board the Titanic?

 Titanic <- read.csv(paste("Titanic Data.csv", sep=""))
 nrow(Titanic)
[1] 889

Q1b. How many passengers survived the sinking of the Titanic?

Titanic_Survivour= subset(Titanic,Survived == 1)
nrow(Titanic_Survivour)
[1] 340

Q1c. Create a one-way contingency table summarizing the Titanic passengers based on how many survived and how many died.

Titanic$Survived.f <- factor(Titanic$Survived, 
                      levels=c(0,1), 
                      labels=c("Perished","Survived"))
mytable<- with(Titanic, table(Survived.f))
mytable
Survived.f
Perished Survived 
     549      340 

Q1d. What was the percentage of passengers who survived the sinking of the Titanic?

prop.table(mytable)*100
Survived.f
Perished Survived 
61.75478 38.24522 

Q2a. Create a two-way contingency table characterising the passengers based on survival and based on the passenger class.

Titanic$Survived.f <- factor(Titanic$Survived, 
                      levels=c(0,1), 
                      labels=c("Perished","Survived"))
mytable2 <- xtabs(~Survived.f+Pclass,data = Titanic)
mytable2
          Pclass
Survived.f   1   2   3
  Perished  80  97 372
  Survived 134  87 119

Q2b. Visualize your table using a Bar plot

barplot(mytable2, 
        main="Survival by Passenger Class", 
        xlab="Passenger Class", ylab="Frequency",
        col=c("grey", "blue"), beside = TRUE)
legend("topleft", fill=c("grey", "blue"), legend=c("Died", "Survived"))

plot of chunk unnamed-chunk-6

Q2c. How many first-class passengers survived the sinking of the Titanic?

nrow(subset(Titanic, Survived == 1 & Pclass == 1))
[1] 134

Q2d. What was the percentage of first-class passengers who survived the sinking of the Titanic?

mytable2 <- xtabs(~ Survived+Pclass, data=Titanic)
prop.table(mytable2, 2)*100
        Pclass
Survived        1        2        3
       0 37.38318 52.71739 75.76375
       1 62.61682 47.28261 24.23625

Q3a. Create a three-way contingency table showing the number of passengers based on the passenger's class, gender and survival.

mytable3 <- xtabs(~ Pclass+Sex+Survived, data=Titanic)
ftable(mytable3)
              Survived   0   1
Pclass Sex                    
1      female            3  89
       male             77  45
2      female            6  70
       male             91  17
3      female           72  72
       male            300  47

Q3b. Express Q3a. in percentages, displaying answers up to two decimal places.

mytable3 <- xtabs(~ Pclass+Sex+Survived, data=Titanic)
ftable(round(prop.table(mytable3)*100, 2))
              Survived     0     1
Pclass Sex                        
1      female           0.34 10.01
       male             8.66  5.06
2      female           0.67  7.87
       male            10.24  1.91
3      female           8.10  8.10
       male            33.75  5.29

Visualize your table in Q3b, using a bar plot..

f <- xtabs(~ Survived+ Pclass, data=subset(Titanic, Sex == "female"))
m <- xtabs(~ Survived+ Pclass, data=subset(Titanic, Sex == "male"))
par(mfrow=c(1,2))
barplot(f,
        main="Female", ylim = c(0,400),
        xlab="Passenger Class", ylab="No of passengers",
        col=c("grey", "blue"), beside = TRUE)
legend("topleft", fill=c("grey", "blue"), legend=c("Died", "Survived"))
barplot(m,
        main="Male", ylim = c(0,400),
        xlab="Passenger Class", ylab="No of passengers",
        col=c("grey", "blue"), beside = TRUE)
legend("topleft", fill=c("grey", "blue"), legend=c("Died", "Survived"))

plot of chunk unnamed-chunk-11

Q3c. How many Females traveling by First-Class survived the sinking of the Titanic?

nrow(subset(Titanic, Survived == 1 & Pclass == 1 & Sex == "female"))
[1] 89

Q3d. What was the percentage of survivors who were female?

mytable4 <- xtabs(~ Survived+Sex, data=Titanic)
prop.table(mytable4, 1)*100
        Sex
Survived   female     male
       0 14.75410 85.24590
       1 67.94118 32.05882

C2.Visualize your answer in Q3d using a Pie-chart.

survived <- xtabs(~ Sex, data=subset(Titanic, Survived == 1))
pct <- c (round(prop.table(survived)*100,2))
lbls <- c("Female", "Male")
lbls <- paste(lbls, pct) 
lbls <- paste(lbls,"%",sep="")
pie(survived, labels = lbls, col = c("blue","grey"), main = "Who survived?")

plot of chunk unnamed-chunk-14

Q3e. What was the percentage of females on board the Titanic who survived?

mytable4 <- xtabs(~ Survived+Sex, data=Titanic)
prop.table(mytable4, 2)*100
        Sex
Survived   female     male
       0 25.96154 81.10919
       1 74.03846 18.89081

Challenge Question C3

Visualize your answer in Q3e using a Pie-chart.

female <- xtabs(~ Survived, data=subset(Titanic, Sex == "female"))
pct <- c (round(prop.table(female)*100,2))
lbls <- c("Died", "Survived")
lbls <- paste(lbls, pct) 
lbls <- paste(lbls,"%",sep="")
pie(female, labels = lbls, col = c("red","blue"), main = "Percentage of females on board the Titanic who survived")

plot of chunk unnamed-chunk-16

Q4a. Use a Pearson's Chi-squared test to evaluate whether the proportion of females who survived was larger than the proportion of males who survived?

mytable4 <- xtabs(~ Survived+Sex, data=Titanic)
chisq.test(mytable4)

    Pearson's Chi-squared test with Yates' continuity correction

data:  mytable4
X-squared = 258.43, df = 1, p-value < 2.2e-16

Q4b.Create a Mosaic Plot of Titanic survivors and nonsurvivors based on gender (male/female), passenger class (First/Second/Third)

library(vcd)
mytable3 <- xtabs(~ Pclass+Sex+Survived, data=Titanic)
mosaic(data=mytable3,~ Pclass+Sex+Survived, shade=TRUE, legend = TRUE, main = "Mosaic Plot")

plot of chunk unnamed-chunk-18

Q5a. Create a one-way contingency table showing the average age of the survivors and the average age of those who died

mytable5 <- aggregate(Titanic$Age ~ Titanic$Survived, FUN = mean)
mytable5
  Titanic$Survived Titanic$Age
1                0    30.41530
2                1    28.42382

Q5b Create two boxplots, placed side-by-side, to visualize the distribution of the age of the survivors and the age of those who died.

boxplot( Age ~ Survived, data = Titanic, main = "Distribution of the age of the survivors ", xlab = "Survived?", ylab = "Age", col = "lightblue")

plot of chunk unnamed-chunk-20

Q5c Run a t-test, comparing the average age of the survivors with the average age of those who died when the Titanic sank.

# independent 2-group t-test
t.test(Titanic$Age~Titanic$Survived)

    Welch Two Sample t-test

data:  Titanic$Age by Titanic$Survived
t = 2.1816, df = 667.56, p-value = 0.02949
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.1990628 3.7838912
sample estimates:
mean in group 0 mean in group 1 
       30.41530        28.42382 
# where Age is numeric and Survived is a binary factor