Task 2a - Review the Titanic data file

Task 2b - Reading the Titanic Data file into R

setwd("C:/Users/Parul Verma/Desktop/Data Analytics Internship")

Using the read function to read the data and storing it in a dataframe called titanic

titanic.df <- read.csv(paste("Titanic Data.csv", sep=""))

Using the view function to view the data in R

View(titanic.df)

Task 3a - To count the total number of passengers on board the Titanic

dim(titanic.df)
## [1] 889   8

Total number of passengers on board the Titanic = 889

Task 3b - To count the number of passengers who survived the sinking of the Titanic

table(titanic.df$Survived)
## 
##   0   1 
## 549 340

The number of passengers who survived the sinking of the Titanic(1 = Survived) = 340

Task 3c - To measure the percentage of passengers who survived the sinking of the Titanic

mytable <- with(titanic.df,table(Survived))
prop.table(mytable)*100
## Survived
##        0        1 
## 61.75478 38.24522

Percentage of passengers who survived the sinking of the Titanic = 38.24522%

Task 3d - To count the number of first-class passengers who survived the sinking of the Titanic

mytable <- xtabs(~ Survived+Pclass, data=titanic.df)
mytable
##         Pclass
## Survived   1   2   3
##        0  80  97 372
##        1 134  87 119

Number of first-class passengers who survived the sinking of the Titanic = 134

Task 3e - To measure the percentage of first-class passengers who survived the sinking of the Titanic

prop.table(mytable, 2)*100
##         Pclass
## Survived        1        2        3
##        0 37.38318 52.71739 75.76375
##        1 62.61682 47.28261 24.23625

Percentage of first-class passengers who survived the sinking of the Titanic = 62.61682%

Task 3f - To count the number of females from First-Class who survived the sinking of the Titanic

mytable <- xtabs(~ Survived+Pclass+Sex, data=titanic.df)
mytable
## , , Sex = female
## 
##         Pclass
## Survived   1   2   3
##        0   3   6  72
##        1  89  70  72
## 
## , , Sex = male
## 
##         Pclass
## Survived   1   2   3
##        0  77  91 300
##        1  45  17  47

Number of females from First-Class who survived the sinking of the Titanic = 89

Task 3g - To measure the percentage of survivors who were female

mytable <- xtabs(~ Survived+Sex, data=titanic.df)
prop.table(mytable,1)*100
##         Sex
## Survived   female     male
##        0 14.75410 85.24590
##        1 67.94118 32.05882

Percentage of survivors who were female = 67.94118%

Task 3h - To measure the percentage of females on board the Titanic who survived

mytable <- xtabs(~ Survived+Sex, data=titanic.df)
prop.table(mytable,2)*100
##         Sex
## Survived   female     male
##        0 25.96154 81.10919
##        1 74.03846 18.89081

Percentage of females on board the Titanic who survived = 74.03846%

Task 3i - Run a Pearson’s Chi-squared test to test the following hypothesis:The proportion of females onboard who survived the sinking of the Titanic was higher than the proportion of males onboard who survived the sinking of the Titanic.

mytable <- xtabs(~Survived+Sex, data=titanic.df)
addmargins(mytable)
##         Sex
## Survived female male Sum
##      0       81  468 549
##      1      231  109 340
##      Sum    312  577 889
chisq.test(mytable)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mytable
## X-squared = 258.43, df = 1, p-value < 2.2e-16

As the value of p is very, very small, thus, our hypothesis is true.