This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
mydata <- read.csv(paste("Titanic.csv.csv" , sep = ""))
head(mydata)
## Survived Pclass Sex Age SibSp Parch Fare Embarked
## 1 0 3 male 22.0 1 0 7.2500 S
## 2 1 1 female 38.0 1 0 71.2833 C
## 3 1 3 female 26.0 0 0 7.9250 S
## 4 1 1 female 35.0 1 0 53.1000 S
## 5 0 3 male 35.0 0 0 8.0500 S
## 6 0 3 male 29.7 0 0 8.4583 Q
library(car)
## Warning: package 'car' was built under R version 3.4.3
str(mydata)
## 'data.frame': 889 obs. of 8 variables:
## $ Survived: int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
## $ Age : num 22 38 26 35 35 29.7 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Embarked: Factor w/ 3 levels "C","Q","S": 3 1 3 3 3 2 3 3 3 1 ...
attach(mydata)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
describe(mydata)
## vars n mean sd median trimmed mad min max range
## Survived 1 889 0.38 0.49 0.00 0.35 0.00 0.0 1.00 1.00
## Pclass 2 889 2.31 0.83 3.00 2.39 0.00 1.0 3.00 2.00
## Sex* 3 889 1.65 0.48 2.00 1.69 0.00 1.0 2.00 1.00
## Age 4 889 29.65 12.97 29.70 29.22 9.34 0.4 80.00 79.60
## SibSp 5 889 0.52 1.10 0.00 0.27 0.00 0.0 8.00 8.00
## Parch 6 889 0.38 0.81 0.00 0.19 0.00 0.0 6.00 6.00
## Fare 7 889 32.10 49.70 14.45 21.28 10.24 0.0 512.33 512.33
## Embarked* 8 889 2.54 0.79 3.00 2.67 0.00 1.0 3.00 2.00
## skew kurtosis se
## Survived 0.48 -1.77 0.02
## Pclass -0.63 -1.27 0.03
## Sex* -0.62 -1.61 0.02
## Age 0.43 0.96 0.43
## SibSp 3.68 17.69 0.04
## Parch 2.74 9.66 0.03
## Fare 4.79 33.23 1.67
## Embarked* -1.26 -0.23 0.03
dim(mydata)
## [1] 889 8
#total number of passengers
dim(mydata) [1]
## [1] 889
#passengers who survived the sinking
nrow(subset(mydata, Survived == 1))
## [1] 340
#One way contingency table
mydata$Survived <- as.factor(mydata$Survived)
SurvivedTable <- table(mydata$Survived)
SurvivedTable
##
## 0 1
## 549 340
#propertion of survived and dead
prop <- prop.table(SurvivedTable)
#Percentage
propPer <- prop*100
propPer
##
## 0 1
## 61.75478 38.24522
#percentage of passengers who survived
propPer[2]
## 1
## 38.24522
mytab <- xtabs(~ Survived + Pclass, data=mydata)
#sum of the passengers
addmargins(mytab)
## Pclass
## Survived 1 2 3 Sum
## 0 80 97 372 549
## 1 134 87 119 340
## Sum 214 184 491 889
nrow(subset(mydata, Survived== 1 & Pclass== 1))
## [1] 134
surviversByClass <- xtabs(~ Survived + Pclass, data=mydata)
prop.table(surviversByClass, 2)
## Pclass
## Survived 1 2 3
## 0 0.3738318 0.5271739 0.7576375
## 1 0.6261682 0.4728261 0.2423625
100*prop.table(surviversByClass, 2)[2,1]
## [1] 62.61682
mytable1 <- xtabs(~ Survived + Sex + Pclass, data=mydata)
addmargins(mytable1)
## , , Pclass = 1
##
## Sex
## Survived female male Sum
## 0 3 77 80
## 1 89 45 134
## Sum 92 122 214
##
## , , Pclass = 2
##
## Sex
## Survived female male Sum
## 0 6 91 97
## 1 70 17 87
## Sum 76 108 184
##
## , , Pclass = 3
##
## Sex
## Survived female male Sum
## 0 72 300 372
## 1 72 47 119
## Sum 144 347 491
##
## , , Pclass = Sum
##
## Sex
## Survived female male Sum
## 0 81 468 549
## 1 231 109 340
## Sum 312 577 889
ftable(mytable1)
## Pclass 1 2 3
## Survived Sex
## 0 female 3 6 72
## male 77 91 300
## 1 female 89 70 72
## male 45 17 47
ftab <- ftable(prop.table(mytable1))
ftabPer <- ftab*100
round(ftabPer, 2)
## Pclass 1 2 3
## Survived Sex
## 0 female 0.34 0.67 8.10
## male 8.66 10.24 33.75
## 1 female 10.01 7.87 8.10
## male 5.06 1.91 5.29
ftable(mytable1)[3]
## [1] 89
SurvivedBySex <- xtabs(~ Survived + Sex, data=mydata)
SurvivedBySex
## Sex
## Survived female male
## 0 81 468
## 1 231 109
propSur <- prop.table(SurvivedBySex, 1)
propSurper <- propSur*100
propSurper[2,1]
## [1] 67.94118
chisq.test(SurvivedBySex)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: SurvivedBySex
## X-squared = 258.43, df = 1, p-value < 2.2e-16
# Percentage of female survivors was higher than that of male survivors
(chisq.test(SurvivedBySex))$p.value
## [1] 3.77991e-58
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.