This is an analysis of titanic Data:
setwd("C:/Users/GOWRI/Desktop/iim_internship")
titanic <- read.csv("Titanic_Data.csv")
View(titanic)
# task 3a: Use R to count the total number of passengers on board the Titanic.
nrow(titanic)
## [1] 889
# Ans: 889
# task 3b: Use R to count the number of passengers who survived the sinking of the Titanic.
sum(titanic$Survived == 1)
## [1] 340
# Ans: 340
# task 3c: Use R to measure the percentage of passengers who survived the sinking of the Titanic.
Survived_percent = sum(titanic$Survived == 1)*100/nrow(titanic)
Survived_percent
## [1] 38.24522
# Ans: 38.24522
# task 3d: Use R to count the number of first-class passengers who survived the sinking of the Titanic.
firstClassPassengers = xtabs(~titanic$Pclass + titanic$Survived)
firstClassPassengers
## titanic$Survived
## titanic$Pclass 0 1
## 1 80 134
## 2 97 87
## 3 372 119
# Ans: 134
# task 3e: Use R to measure the percentage of first-class passengers who survived the sinking of the Titanic.
firstClassPassengersPercent = prop.table(firstClassPassengers)
firstClassPassengersPercent
## titanic$Survived
## titanic$Pclass 0 1
## 1 0.08998875 0.15073116
## 2 0.10911136 0.09786277
## 3 0.41844769 0.13385827
# Ans: 0.15073116
# task 3f: Use R to count the number of females from First-Class who survived the sinking of the Titanic
firstClassfemalePassengers = xtabs(~titanic$Pclass + titanic$Survived + titanic$Sex)
firstClassfemalePassengers
## , , titanic$Sex = female
##
## titanic$Survived
## titanic$Pclass 0 1
## 1 3 89
## 2 6 70
## 3 72 72
##
## , , titanic$Sex = male
##
## titanic$Survived
## titanic$Pclass 0 1
## 1 77 45
## 2 91 17
## 3 300 47
# Ans: 89
firstClassfemalePassengersPercent = prop.table(firstClassfemalePassengers)
firstClassfemalePassengersPercent
## , , titanic$Sex = female
##
## titanic$Survived
## titanic$Pclass 0 1
## 1 0.003374578 0.100112486
## 2 0.006749156 0.078740157
## 3 0.080989876 0.080989876
##
## , , titanic$Sex = male
##
## titanic$Survived
## titanic$Pclass 0 1
## 1 0.086614173 0.050618673
## 2 0.102362205 0.019122610
## 3 0.337457818 0.052868391
# task 3g and 3h
SurvivedFemalePassengers = xtabs(~titanic$Survived + titanic$Sex)
SurvivedFemalePassengers
## titanic$Sex
## titanic$Survived female male
## 0 81 468
## 1 231 109
# task 3g: Use R to measure the percentage of survivors who were female
FemaleSurvivedPassengersPercent = prop.table(SurvivedFemalePassengers,1)
FemaleSurvivedPassengersPercent
## titanic$Sex
## titanic$Survived female male
## 0 0.1475410 0.8524590
## 1 0.6794118 0.3205882
# Ans: 0.6794118
# task 3h: Use R to measure the percentage of females on board the Titanic who survived
SurvivedFemalePassengersPercent = prop.table(SurvivedFemalePassengers,2)
SurvivedFemalePassengersPercent
## titanic$Sex
## titanic$Survived female male
## 0 0.2596154 0.8110919
## 1 0.7403846 0.1889081
# Ans: 0.7403846
# task 3i: Run a Pearson's Chi-squared test to test the following hypothesis:
# Hypothesis: The proportion of females onboard who survived the sinking of the Titanic was higher than the proportion of males onboard who survived the sinking of the Titanic.
tbl = table(titanic$Survived, titanic$Sex)
tbl
##
## female male
## 0 81 468
## 1 231 109
chisq.test(tbl)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tbl
## X-squared = 258.43, df = 1, p-value < 2.2e-16