setwd("C:/Users/jaya/downloads")
titanic.df<-read.csv(paste("Titanic Data.csv",sep=""))
View(titanic.df)
#2 Summary Statistics of the data
summary(titanic.df)
## Survived Pclass Sex Age
## Min. :0.0000 Min. :1.000 female:312 Min. : 0.40
## 1st Qu.:0.0000 1st Qu.:2.000 male :577 1st Qu.:22.00
## Median :0.0000 Median :3.000 Median :29.70
## Mean :0.3825 Mean :2.312 Mean :29.65
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:35.00
## Max. :1.0000 Max. :3.000 Max. :80.00
## SibSp Parch Fare Embarked
## Min. :0.0000 Min. :0.0000 Min. : 0.000 C:168
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 7.896 Q: 77
## Median :0.0000 Median :0.0000 Median : 14.454 S:644
## Mean :0.5242 Mean :0.3825 Mean : 32.097
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 31.000
## Max. :8.0000 Max. :6.0000 Max. :512.329
library(psych)
describe(titanic.df)
## vars n mean sd median trimmed mad min max range
## Survived 1 889 0.38 0.49 0.00 0.35 0.00 0.0 1.00 1.00
## Pclass 2 889 2.31 0.83 3.00 2.39 0.00 1.0 3.00 2.00
## Sex* 3 889 1.65 0.48 2.00 1.69 0.00 1.0 2.00 1.00
## Age 4 889 29.65 12.97 29.70 29.22 9.34 0.4 80.00 79.60
## SibSp 5 889 0.52 1.10 0.00 0.27 0.00 0.0 8.00 8.00
## Parch 6 889 0.38 0.81 0.00 0.19 0.00 0.0 6.00 6.00
## Fare 7 889 32.10 49.70 14.45 21.28 10.24 0.0 512.33 512.33
## Embarked* 8 889 2.54 0.79 3.00 2.67 0.00 1.0 3.00 2.00
## skew kurtosis se
## Survived 0.48 -1.77 0.02
## Pclass -0.63 -1.27 0.03
## Sex* -0.62 -1.61 0.02
## Age 0.43 0.96 0.43
## SibSp 3.68 17.69 0.04
## Parch 2.74 9.66 0.03
## Fare 4.79 33.23 1.67
## Embarked* -1.26 -0.23 0.03
attach(titanic.df)
str(titanic.df)
## 'data.frame': 889 obs. of 8 variables:
## $ Survived: int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
## $ Age : num 22 38 26 35 35 29.7 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Embarked: Factor w/ 3 levels "C","Q","S": 3 1 3 3 3 2 3 3 3 1 ...
# number of people survived specifying males and females.
aggregate(titanic.df$Survived, by=list(Sex=titanic.df$Sex), sum)
## Sex x
## 1 female 231
## 2 male 109
# Total Number of Passengers
length(titanic.df$Survived)
## [1] 889
# 3b. Number of Passengers who survived "0 <-died" "1 <-lived"
survivedTable <- table(titanic.df$Survived)
View(survivedTable)
# 3c. Percentage of Passengers who surivied
100*prop.table(survivedTable) # proportions
##
## 0 1
## 61.75478 38.24522
# 3c. Alternate soluton
summary(titanic.df$Survived)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.3825 1.0000 1.0000
# 3d. Number of 1st Class Passengers Who Survived?
surviversByClass <- xtabs(~ Survived+Pclass, data=titanic.df)
View(surviversByClass)
surviversByClass # frequencies
## Pclass
## Survived 1 2 3
## 0 80 97 372
## 1 134 87 119
addmargins(surviversByClass)
## Pclass
## Survived 1 2 3 Sum
## 0 80 97 372 549
## 1 134 87 119 340
## Sum 214 184 491 889
# 3e. measure the percentage of 1st class passengers who survived?
survived1stclasstable <- xtabs(~Survived+Pclass, data=titanic.df)
ftable(prop.table(survived1stclasstable, c(1,2)))
## Pclass 1 2 3
## Survived
## 0 1 1 1
## 1 1 1 1
# 3f. count the number of females from First-Class who survived?
surviversByClass <- xtabs(~ Survived+Pclass+Sex, data=titanic.df)
View(surviversByClass)
surviversByClass # frequencies
## , , Sex = female
##
## Pclass
## Survived 1 2 3
## 0 3 6 72
## 1 89 70 72
##
## , , Sex = male
##
## Pclass
## Survived 1 2 3
## 0 77 91 300
## 1 45 17 47
addmargins(surviversByClass)
## , , Sex = female
##
## Pclass
## Survived 1 2 3 Sum
## 0 3 6 72 81
## 1 89 70 72 231
## Sum 92 76 144 312
##
## , , Sex = male
##
## Pclass
## Survived 1 2 3 Sum
## 0 77 91 300 468
## 1 45 17 47 109
## Sum 122 108 347 577
##
## , , Sex = Sum
##
## Pclass
## Survived 1 2 3 Sum
## 0 80 97 372 549
## 1 134 87 119 340
## Sum 214 184 491 889
# 3g. measure the percentage of 1st class female passengers who survived?
survived1stclasstable <- xtabs(~Survived+Pclass+Sex, data=titanic.df)
ftable(prop.table(survived1stclasstable, c(1,2)))
## Sex female male
## Survived Pclass
## 0 1 0.03750000 0.96250000
## 2 0.06185567 0.93814433
## 3 0.19354839 0.80645161
## 1 1 0.66417910 0.33582090
## 2 0.80459770 0.19540230
## 3 0.60504202 0.39495798
# 3i. Pearson's Chi-squared test
chisq.test(survivedTable)
##
## Chi-squared test for given probabilities
##
## data: survivedTable
## X-squared = 49.135, df = 1, p-value = 2.389e-12