This is a task for Data Analytics Internship under Prof. Sameer Mathur
titan.df<-read.csv(paste("Titanic Data.csv"))
View (titan.df)
summary(titan.df)
## Survived Pclass Sex Age
## Min. :0.0000 Min. :1.000 female:312 Min. : 0.40
## 1st Qu.:0.0000 1st Qu.:2.000 male :577 1st Qu.:22.00
## Median :0.0000 Median :3.000 Median :29.70
## Mean :0.3825 Mean :2.312 Mean :29.65
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:35.00
## Max. :1.0000 Max. :3.000 Max. :80.00
## SibSp Parch Fare Embarked
## Min. :0.0000 Min. :0.0000 Min. : 0.000 C:168
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 7.896 Q: 77
## Median :0.0000 Median :0.0000 Median : 14.454 S:644
## Mean :0.5242 Mean :0.3825 Mean : 32.097
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 31.000
## Max. :8.0000 Max. :6.0000 Max. :512.329
temp.df<-titan.df[ ,1]
length(temp.df)
## [1] 889
sum(titan.df$Survived)
## [1] 340
prop.table(table(temp.df))
## temp.df
## 0 1
## 0.6175478 0.3824522
prop.table(table(temp.df))*100
## temp.df
## 0 1
## 61.75478 38.24522
table(titan.df$Pclass,titan.df$Survived)
##
## 0 1
## 1 80 134
## 2 97 87
## 3 372 119
prop.table(table(titan.df$Pclass,titan.df$Survived),1)*100
##
## 0 1
## 1 37.38318 62.61682
## 2 52.71739 47.28261
## 3 75.76375 24.23625
table(titan.df$Sex,titan.df$Survived,titan.df$Pclass)
## , , = 1
##
##
## 0 1
## female 3 89
## male 77 45
##
## , , = 2
##
##
## 0 1
## female 6 70
## male 91 17
##
## , , = 3
##
##
## 0 1
## female 72 72
## male 300 47
prop.table(table(titan.df$Sex,titan.df$Survived),1)*100
##
## 0 1
## female 25.96154 74.03846
## male 81.10919 18.89081
prop.table(table(titan.df$Sex,titan.df$Survived),1)*100
##
## 0 1
## female 25.96154 74.03846
## male 81.10919 18.89081
mytable<-xtabs(~Survived+Sex, data=titan.df)
addmargins(mytable)
## Sex
## Survived female male Sum
## 0 81 468 549
## 1 231 109 340
## Sum 312 577 889
chisq.test(mytable)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable
## X-squared = 258.43, df = 1, p-value < 2.2e-16