R Markdown

This is an R Markdown document based on the Titanic Case Study.``

titanic.df <- read.csv(paste("Titanic Data.csv", sep=""))
 View(titanic.df)
 library("psych", lib.loc="~/R/win-library/3.4")
 describe(titanic.df)
##           vars   n  mean    sd median trimmed   mad min    max  range
## Survived     1 889  0.38  0.49   0.00    0.35  0.00 0.0   1.00   1.00
## Pclass       2 889  2.31  0.83   3.00    2.39  0.00 1.0   3.00   2.00
## Sex*         3 889  1.65  0.48   2.00    1.69  0.00 1.0   2.00   1.00
## Age          4 889 29.65 12.97  29.70   29.22  9.34 0.4  80.00  79.60
## SibSp        5 889  0.52  1.10   0.00    0.27  0.00 0.0   8.00   8.00
## Parch        6 889  0.38  0.81   0.00    0.19  0.00 0.0   6.00   6.00
## Fare         7 889 32.10 49.70  14.45   21.28 10.24 0.0 512.33 512.33
## Embarked*    8 889  2.54  0.79   3.00    2.67  0.00 1.0   3.00   2.00
##            skew kurtosis   se
## Survived   0.48    -1.77 0.02
## Pclass    -0.63    -1.27 0.03
## Sex*      -0.62    -1.61 0.02
## Age        0.43     0.96 0.43
## SibSp      3.68    17.69 0.04
## Parch      2.74     9.66 0.03
## Fare       4.79    33.23 1.67
## Embarked* -1.26    -0.23 0.03
 summary(titanic.df)
##     Survived          Pclass          Sex           Age       
##  Min.   :0.0000   Min.   :1.000   female:312   Min.   : 0.40  
##  1st Qu.:0.0000   1st Qu.:2.000   male  :577   1st Qu.:22.00  
##  Median :0.0000   Median :3.000                Median :29.70  
##  Mean   :0.3825   Mean   :2.312                Mean   :29.65  
##  3rd Qu.:1.0000   3rd Qu.:3.000                3rd Qu.:35.00  
##  Max.   :1.0000   Max.   :3.000                Max.   :80.00  
##      SibSp            Parch             Fare         Embarked
##  Min.   :0.0000   Min.   :0.0000   Min.   :  0.000   C:168   
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:  7.896   Q: 77   
##  Median :0.0000   Median :0.0000   Median : 14.454   S:644   
##  Mean   :0.5242   Mean   :0.3825   Mean   : 32.097           
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.: 31.000           
##  Max.   :8.0000   Max.   :6.0000   Max.   :512.329
 dim(titanic.df)
## [1] 889   8
 ("total number of passengers on board the Titanic=889")
## [1] "total number of passengers on board the Titanic=889"
 table(titanic.df$Survived)
## 
##   0   1 
## 549 340
 ("number of passengers who survived the sinking of the Titanic=340")
## [1] "number of passengers who survived the sinking of the Titanic=340"
 prop.table(table(titanic.df$Survived))*100
## 
##        0        1 
## 61.75478 38.24522
 ("percentage of passengers who survived the sinking of the Titanic=38.24%")
## [1] "percentage of passengers who survived the sinking of the Titanic=38.24%"
 mytable <- xtabs(~ Survived+Pclass, data = titanic.df)
 mytable # frequencies
##         Pclass
## Survived   1   2   3
##        0  80  97 372
##        1 134  87 119
 (" the number of first-class passengers who survived the sinking of the Titanic=134")
## [1] " the number of first-class passengers who survived the sinking of the Titanic=134"
 prop.table(mytable)
##         Pclass
## Survived          1          2          3
##        0 0.08998875 0.10911136 0.41844769
##        1 0.15073116 0.09786277 0.13385827
 prop.table(mytable)*100
##         Pclass
## Survived         1         2         3
##        0  8.998875 10.911136 41.844769
##        1 15.073116  9.786277 13.385827
 prop.table(mytable, 2) # column proportions
##         Pclass
## Survived         1         2         3
##        0 0.3738318 0.5271739 0.7576375
##        1 0.6261682 0.4728261 0.2423625
 (" the percentage of first-class passengers who survived the sinking of the Titanic=62.61%")
## [1] " the percentage of first-class passengers who survived the sinking of the Titanic=62.61%"
 mytable <- xtabs(~ Survived+Pclass+Sex, data = titanic.df)
 mytable # frequencies
## , , Sex = female
## 
##         Pclass
## Survived   1   2   3
##        0   3   6  72
##        1  89  70  72
## 
## , , Sex = male
## 
##         Pclass
## Survived   1   2   3
##        0  77  91 300
##        1  45  17  47
 mytable <- xtabs(~ Survived+Sex, data = titanic.df)
 mytable
##         Sex
## Survived female male
##        0     81  468
##        1    231  109
 (" the number of females from First-Class who survived the sinking of the Titanic=89")
## [1] " the number of females from First-Class who survived the sinking of the Titanic=89"
 prop.table(mytable, 1)
##         Sex
## Survived    female      male
##        0 0.1475410 0.8524590
##        1 0.6794118 0.3205882
 ("the percentage of survivors who were female=67.94%")
## [1] "the percentage of survivors who were female=67.94%"
 prop.table(mytable, 2)
##         Sex
## Survived    female      male
##        0 0.2596154 0.8110919
##        1 0.7403846 0.1889081
 ("the percentage of females on board the Titanic who survived=74.03%")
## [1] "the percentage of females on board the Titanic who survived=74.03%"
 mytable <- xtabs(~ Survived+Sex, data = titanic.df)
 addmargins(mytable)
##         Sex
## Survived female male Sum
##      0       81  468 549
##      1      231  109 340
##      Sum    312  577 889
 chisq.test(mytable)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mytable
## X-squared = 258.43, df = 1, p-value < 2.2e-16
 ("Since p value is significantly small, we reject the null hypthesis that the sex of the passenger and survival of a passenger are independent of each other. Statistical sognificance between the proportion of females onboard who survived the sinking of the Titanic and than the proportion of males onboard who survived the sinking of the Titanic")
## [1] "Since p value is significantly small, we reject the null hypthesis that the sex of the passenger and survival of a passenger are independent of each other. Statistical sognificance between the proportion of females onboard who survived the sinking of the Titanic and than the proportion of males onboard who survived the sinking of the Titanic"

```