This is an analysis of titanic Data:

setwd("C:/Users/GOWRI/Desktop/iim_internship")
titanic <- read.csv("Titanic_Data.csv")
View(titanic)
# task 3a: Use R to count the total number of passengers on board the Titanic.
nrow(titanic)
## [1] 889
# Ans: 889
# task 3b: Use R to count the number of passengers who survived the sinking of the Titanic.
sum(titanic$Survived == 1)
## [1] 340
# Ans: 340
# task 3c: Use R to measure the percentage of passengers who survived the sinking of the Titanic.
Survived_percent = sum(titanic$Survived == 1)*100/nrow(titanic)
Survived_percent
## [1] 38.24522
# Ans: 38.24522
# task 3d: Use R to count the number of first-class passengers who survived the sinking of the Titanic.
firstClassPassengers = xtabs(~titanic$Pclass + titanic$Survived)
firstClassPassengers
##               titanic$Survived
## titanic$Pclass   0   1
##              1  80 134
##              2  97  87
##              3 372 119
# Ans: 134
# task 3e: Use R to measure the percentage of first-class passengers who survived the sinking of the Titanic.
firstClassPassengersPercent = prop.table(firstClassPassengers)
firstClassPassengersPercent
##               titanic$Survived
## titanic$Pclass          0          1
##              1 0.08998875 0.15073116
##              2 0.10911136 0.09786277
##              3 0.41844769 0.13385827
# Ans: 0.15073116
# task 3f: Use R to count the number of females from First-Class who survived the sinking of the Titanic
firstClassfemalePassengers = xtabs(~titanic$Pclass + titanic$Survived + titanic$Sex)
firstClassfemalePassengers
## , , titanic$Sex = female
## 
##               titanic$Survived
## titanic$Pclass   0   1
##              1   3  89
##              2   6  70
##              3  72  72
## 
## , , titanic$Sex = male
## 
##               titanic$Survived
## titanic$Pclass   0   1
##              1  77  45
##              2  91  17
##              3 300  47
# Ans: 89

firstClassfemalePassengersPercent = prop.table(firstClassfemalePassengers)
firstClassfemalePassengersPercent
## , , titanic$Sex = female
## 
##               titanic$Survived
## titanic$Pclass           0           1
##              1 0.003374578 0.100112486
##              2 0.006749156 0.078740157
##              3 0.080989876 0.080989876
## 
## , , titanic$Sex = male
## 
##               titanic$Survived
## titanic$Pclass           0           1
##              1 0.086614173 0.050618673
##              2 0.102362205 0.019122610
##              3 0.337457818 0.052868391
# task 3g and 3h
SurvivedFemalePassengers = xtabs(~titanic$Survived + titanic$Sex)
SurvivedFemalePassengers
##                 titanic$Sex
## titanic$Survived female male
##                0     81  468
##                1    231  109
# task 3g: Use R to measure the percentage of survivors who were female

FemaleSurvivedPassengersPercent = prop.table(SurvivedFemalePassengers,1)
FemaleSurvivedPassengersPercent
##                 titanic$Sex
## titanic$Survived    female      male
##                0 0.1475410 0.8524590
##                1 0.6794118 0.3205882
# Ans: 0.6794118

# task 3h: Use R to measure the percentage of females on board the Titanic who survived
SurvivedFemalePassengersPercent = prop.table(SurvivedFemalePassengers,2)
SurvivedFemalePassengersPercent
##                 titanic$Sex
## titanic$Survived    female      male
##                0 0.2596154 0.8110919
##                1 0.7403846 0.1889081
# Ans: 0.7403846
# task 3i: Run a Pearson's Chi-squared test to test the following hypothesis:

# Hypothesis:  The proportion of females onboard who survived the sinking of the Titanic was higher than the proportion of males onboard who survived the sinking of the Titanic.

tbl = table(titanic$Survived, titanic$Sex) 
tbl
##    
##     female male
##   0     81  468
##   1    231  109
chisq.test(tbl)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tbl
## X-squared = 258.43, df = 1, p-value < 2.2e-16