This is an R Markdown document based on the Titanic Case Study.``
titanic.df <- read.csv(paste("Titanic Data.csv", sep=""))
View(titanic.df)
library("psych", lib.loc="~/R/win-library/3.4")
describe(titanic.df)
## vars n mean sd median trimmed mad min max range
## Survived 1 889 0.38 0.49 0.00 0.35 0.00 0.0 1.00 1.00
## Pclass 2 889 2.31 0.83 3.00 2.39 0.00 1.0 3.00 2.00
## Sex* 3 889 1.65 0.48 2.00 1.69 0.00 1.0 2.00 1.00
## Age 4 889 29.65 12.97 29.70 29.22 9.34 0.4 80.00 79.60
## SibSp 5 889 0.52 1.10 0.00 0.27 0.00 0.0 8.00 8.00
## Parch 6 889 0.38 0.81 0.00 0.19 0.00 0.0 6.00 6.00
## Fare 7 889 32.10 49.70 14.45 21.28 10.24 0.0 512.33 512.33
## Embarked* 8 889 2.54 0.79 3.00 2.67 0.00 1.0 3.00 2.00
## skew kurtosis se
## Survived 0.48 -1.77 0.02
## Pclass -0.63 -1.27 0.03
## Sex* -0.62 -1.61 0.02
## Age 0.43 0.96 0.43
## SibSp 3.68 17.69 0.04
## Parch 2.74 9.66 0.03
## Fare 4.79 33.23 1.67
## Embarked* -1.26 -0.23 0.03
summary(titanic.df)
## Survived Pclass Sex Age
## Min. :0.0000 Min. :1.000 female:312 Min. : 0.40
## 1st Qu.:0.0000 1st Qu.:2.000 male :577 1st Qu.:22.00
## Median :0.0000 Median :3.000 Median :29.70
## Mean :0.3825 Mean :2.312 Mean :29.65
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:35.00
## Max. :1.0000 Max. :3.000 Max. :80.00
## SibSp Parch Fare Embarked
## Min. :0.0000 Min. :0.0000 Min. : 0.000 C:168
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 7.896 Q: 77
## Median :0.0000 Median :0.0000 Median : 14.454 S:644
## Mean :0.5242 Mean :0.3825 Mean : 32.097
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 31.000
## Max. :8.0000 Max. :6.0000 Max. :512.329
dim(titanic.df)
## [1] 889 8
("total number of passengers on board the Titanic=889")
## [1] "total number of passengers on board the Titanic=889"
table(titanic.df$Survived)
##
## 0 1
## 549 340
("number of passengers who survived the sinking of the Titanic=340")
## [1] "number of passengers who survived the sinking of the Titanic=340"
prop.table(table(titanic.df$Survived))*100
##
## 0 1
## 61.75478 38.24522
("percentage of passengers who survived the sinking of the Titanic=38.24%")
## [1] "percentage of passengers who survived the sinking of the Titanic=38.24%"
mytable <- xtabs(~ Survived+Pclass, data = titanic.df)
mytable # frequencies
## Pclass
## Survived 1 2 3
## 0 80 97 372
## 1 134 87 119
(" the number of first-class passengers who survived the sinking of the Titanic=134")
## [1] " the number of first-class passengers who survived the sinking of the Titanic=134"
prop.table(mytable)
## Pclass
## Survived 1 2 3
## 0 0.08998875 0.10911136 0.41844769
## 1 0.15073116 0.09786277 0.13385827
prop.table(mytable)*100
## Pclass
## Survived 1 2 3
## 0 8.998875 10.911136 41.844769
## 1 15.073116 9.786277 13.385827
prop.table(mytable, 2) # column proportions
## Pclass
## Survived 1 2 3
## 0 0.3738318 0.5271739 0.7576375
## 1 0.6261682 0.4728261 0.2423625
(" the percentage of first-class passengers who survived the sinking of the Titanic=62.61%")
## [1] " the percentage of first-class passengers who survived the sinking of the Titanic=62.61%"
mytable <- xtabs(~ Survived+Pclass+Sex, data = titanic.df)
mytable # frequencies
## , , Sex = female
##
## Pclass
## Survived 1 2 3
## 0 3 6 72
## 1 89 70 72
##
## , , Sex = male
##
## Pclass
## Survived 1 2 3
## 0 77 91 300
## 1 45 17 47
mytable <- xtabs(~ Survived+Sex, data = titanic.df)
mytable
## Sex
## Survived female male
## 0 81 468
## 1 231 109
(" the number of females from First-Class who survived the sinking of the Titanic=89")
## [1] " the number of females from First-Class who survived the sinking of the Titanic=89"
prop.table(mytable, 1)
## Sex
## Survived female male
## 0 0.1475410 0.8524590
## 1 0.6794118 0.3205882
("the percentage of survivors who were female=67.94%")
## [1] "the percentage of survivors who were female=67.94%"
prop.table(mytable, 2)
## Sex
## Survived female male
## 0 0.2596154 0.8110919
## 1 0.7403846 0.1889081
("the percentage of females on board the Titanic who survived=74.03%")
## [1] "the percentage of females on board the Titanic who survived=74.03%"
mytable <- xtabs(~ Survived+Sex, data = titanic.df)
addmargins(mytable)
## Sex
## Survived female male Sum
## 0 81 468 549
## 1 231 109 340
## Sum 312 577 889
chisq.test(mytable)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable
## X-squared = 258.43, df = 1, p-value < 2.2e-16
("Since p value is significantly small, we reject the null hypthesis that the sex of the passenger and survival of a passenger are independent of each other. Statistical sognificance between the proportion of females onboard who survived the sinking of the Titanic and than the proportion of males onboard who survived the sinking of the Titanic")
## [1] "Since p value is significantly small, we reject the null hypthesis that the sex of the passenger and survival of a passenger are independent of each other. Statistical sognificance between the proportion of females onboard who survived the sinking of the Titanic and than the proportion of males onboard who survived the sinking of the Titanic"
```