R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

mydata <- read.csv(paste("Titanic.csv.csv" , sep = ""))
head(mydata)
##   Survived Pclass    Sex  Age SibSp Parch    Fare Embarked
## 1        0      3   male 22.0     1     0  7.2500        S
## 2        1      1 female 38.0     1     0 71.2833        C
## 3        1      3 female 26.0     0     0  7.9250        S
## 4        1      1 female 35.0     1     0 53.1000        S
## 5        0      3   male 35.0     0     0  8.0500        S
## 6        0      3   male 29.7     0     0  8.4583        Q
library(car)
## Warning: package 'car' was built under R version 3.4.3
str(mydata)
## 'data.frame':    889 obs. of  8 variables:
##  $ Survived: int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass  : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Sex     : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : num  22 38 26 35 35 29.7 54 2 27 14 ...
##  $ SibSp   : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch   : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Fare    : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Embarked: Factor w/ 3 levels "C","Q","S": 3 1 3 3 3 2 3 3 3 1 ...
attach(mydata)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
describe(mydata)
##           vars   n  mean    sd median trimmed   mad min    max  range
## Survived     1 889  0.38  0.49   0.00    0.35  0.00 0.0   1.00   1.00
## Pclass       2 889  2.31  0.83   3.00    2.39  0.00 1.0   3.00   2.00
## Sex*         3 889  1.65  0.48   2.00    1.69  0.00 1.0   2.00   1.00
## Age          4 889 29.65 12.97  29.70   29.22  9.34 0.4  80.00  79.60
## SibSp        5 889  0.52  1.10   0.00    0.27  0.00 0.0   8.00   8.00
## Parch        6 889  0.38  0.81   0.00    0.19  0.00 0.0   6.00   6.00
## Fare         7 889 32.10 49.70  14.45   21.28 10.24 0.0 512.33 512.33
## Embarked*    8 889  2.54  0.79   3.00    2.67  0.00 1.0   3.00   2.00
##            skew kurtosis   se
## Survived   0.48    -1.77 0.02
## Pclass    -0.63    -1.27 0.03
## Sex*      -0.62    -1.61 0.02
## Age        0.43     0.96 0.43
## SibSp      3.68    17.69 0.04
## Parch      2.74     9.66 0.03
## Fare       4.79    33.23 1.67
## Embarked* -1.26    -0.23 0.03
dim(mydata)
## [1] 889   8
#total number of passengers
dim(mydata) [1]
## [1] 889
#passengers who survived the sinking
nrow(subset(mydata, Survived == 1))
## [1] 340
#One way contingency table
mydata$Survived <- as.factor(mydata$Survived)
SurvivedTable <- table(mydata$Survived)
SurvivedTable
## 
##   0   1 
## 549 340
#propertion of survived and dead
prop <- prop.table(SurvivedTable)
#Percentage
propPer <- prop*100
propPer
## 
##        0        1 
## 61.75478 38.24522
#percentage of passengers who survived
propPer[2]
##        1 
## 38.24522
mytab <- xtabs(~ Survived + Pclass, data=mydata)
#sum of the passengers
addmargins(mytab)
##         Pclass
## Survived   1   2   3 Sum
##      0    80  97 372 549
##      1   134  87 119 340
##      Sum 214 184 491 889
nrow(subset(mydata, Survived== 1 & Pclass== 1))
## [1] 134
surviversByClass <- xtabs(~ Survived + Pclass, data=mydata)
prop.table(surviversByClass, 2)
##         Pclass
## Survived         1         2         3
##        0 0.3738318 0.5271739 0.7576375
##        1 0.6261682 0.4728261 0.2423625
100*prop.table(surviversByClass, 2)[2,1]
## [1] 62.61682
mytable1 <- xtabs(~ Survived + Sex + Pclass, data=mydata)
addmargins(mytable1)
## , , Pclass = 1
## 
##         Sex
## Survived female male Sum
##      0        3   77  80
##      1       89   45 134
##      Sum     92  122 214
## 
## , , Pclass = 2
## 
##         Sex
## Survived female male Sum
##      0        6   91  97
##      1       70   17  87
##      Sum     76  108 184
## 
## , , Pclass = 3
## 
##         Sex
## Survived female male Sum
##      0       72  300 372
##      1       72   47 119
##      Sum    144  347 491
## 
## , , Pclass = Sum
## 
##         Sex
## Survived female male Sum
##      0       81  468 549
##      1      231  109 340
##      Sum    312  577 889
ftable(mytable1)
##                 Pclass   1   2   3
## Survived Sex                      
## 0        female          3   6  72
##          male           77  91 300
## 1        female         89  70  72
##          male           45  17  47
ftab <- ftable(prop.table(mytable1))
ftabPer <- ftab*100
round(ftabPer, 2)
##                 Pclass     1     2     3
## Survived Sex                            
## 0        female         0.34  0.67  8.10
##          male           8.66 10.24 33.75
## 1        female        10.01  7.87  8.10
##          male           5.06  1.91  5.29
ftable(mytable1)[3]
## [1] 89
SurvivedBySex <- xtabs(~ Survived + Sex, data=mydata)
SurvivedBySex 
##         Sex
## Survived female male
##        0     81  468
##        1    231  109
propSur <- prop.table(SurvivedBySex, 1)
propSurper <- propSur*100
propSurper[2,1]
## [1] 67.94118
chisq.test(SurvivedBySex)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  SurvivedBySex
## X-squared = 258.43, df = 1, p-value < 2.2e-16
# Percentage of female survivors was higher than that of male survivors
(chisq.test(SurvivedBySex))$p.value
## [1] 3.77991e-58

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.