require(datasets)
require(ggvis)
## Loading required package: ggvis
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(magrittr)
## Loading required package: magrittr
Titanicsurvival <- read.csv(file = "http://www.personal.psu.edu/dlp/w540/datasets/titanicsurvival.csv", header = TRUE, sep=",")
Titanicsurvival <-tbl_df(Titanicsurvival)
Titanicsurvival
## Source: local data frame [2,201 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 1 1 1 1
## 2 1 1 1 1
## 3 1 1 1 1
## 4 1 1 1 1
## 5 1 1 1 1
## 6 1 1 1 1
## 7 1 1 1 1
## 8 1 1 1 1
## 9 1 1 1 1
## 10 1 1 1 1
## .. ... ... ... ...
TitanicPassengers <- nrow(Titanicsurvival)
TitanicPassengers
## [1] 2201
Crew<- nrow(filter(Titanicsurvival, Class == 0))
Crew
## [1] 885
FirstClass<-nrow(filter(Titanicsurvival, Class == 1))
FirstClass
## [1] 325
SecondClass<-nrow(filter(Titanicsurvival, Class == 2))
SecondClass
## [1] 285
ThirdClass<-nrow(filter(Titanicsurvival, Class == 3))
ThirdClass
## [1] 706
Male<-nrow(filter(Titanicsurvival, Sex == 1))
Male
## [1] 1731
Female<-nrow(filter(Titanicsurvival, Sex == 0))
Female
## [1] 470
Adult<-nrow(filter(Titanicsurvival, Age == 1))
Adult
## [1] 2092
Child<-nrow(filter(Titanicsurvival, Age == 0))
Child
## [1] 109
TitanicSurvived <- filter(Titanicsurvival, Survive == 1)
TitanicSurvived1<- nrow(TitanicSurvived)
TitanicSurvived1
## [1] 711
TitanicSurvived2<-((TitanicSurvived1/TitanicPassengers)*100)
TitanicSurvived2
## [1] 32.3035
###Crew
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_C0 <- filter(Titanic, Class == 0,Survive==1 )
TitanicSurvivor_C0
## Source: local data frame [212 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 0 1 1 1
## 2 0 1 1 1
## 3 0 1 1 1
## 4 0 1 1 1
## 5 0 1 1 1
## 6 0 1 1 1
## 7 0 1 1 1
## 8 0 1 1 1
## 9 0 1 1 1
## 10 0 1 1 1
## .. ... ... ... ...
CrewSurvivor<-nrow(TitanicSurvivor_C0)
CrewSurvivor
## [1] 212
PropCrewSurvivor<-((CrewSurvivor/Crew)*100)
PropCrewSurvivor
## [1] 23.9548
###1st Class
TitanicSurvivor_C1 <- filter(Titanic, Class == 1,Survive==1 )
TitanicSurvivor_C1
## Source: local data frame [203 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 1 1 1 1
## 2 1 1 1 1
## 3 1 1 1 1
## 4 1 1 1 1
## 5 1 1 1 1
## 6 1 1 1 1
## 7 1 1 1 1
## 8 1 1 1 1
## 9 1 1 1 1
## 10 1 1 1 1
## .. ... ... ... ...
FirstClassSurvivor<-nrow(TitanicSurvivor_C1)
FirstClassSurvivor
## [1] 203
PropFirstClassSurvivor<-((FirstClassSurvivor/FirstClass)*100)
PropFirstClassSurvivor
## [1] 62.46154
###2nd Class
TitanicSurvivor_C2 <- filter(Titanic, Class == 2,Survive==1 )
TitanicSurvivor_C2
## Source: local data frame [118 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 2 1 1 1
## 2 2 1 1 1
## 3 2 1 1 1
## 4 2 1 1 1
## 5 2 1 1 1
## 6 2 1 1 1
## 7 2 1 1 1
## 8 2 1 1 1
## 9 2 1 1 1
## 10 2 1 1 1
## .. ... ... ... ...
SecondClassSurvivor<-nrow(TitanicSurvivor_C2)
SecondClassSurvivor
## [1] 118
PropSecondClassSurvivor<-((SecondClassSurvivor/SecondClass)*100)
PropSecondClassSurvivor
## [1] 41.40351
###3rd Class
TitanicSurvivor_C3 <- filter(Titanic, Class == 3,Survive==1 )
TitanicSurvivor_C3
## Source: local data frame [178 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 3 1 1 1
## 2 3 1 1 1
## 3 3 1 1 1
## 4 3 1 1 1
## 5 3 1 1 1
## 6 3 1 1 1
## 7 3 1 1 1
## 8 3 1 1 1
## 9 3 1 1 1
## 10 3 1 1 1
## .. ... ... ... ...
ThirdClassSurvivor<-nrow(TitanicSurvivor_C3)
ThirdClassSurvivor
## [1] 178
PropThirdClassSurvivor<-((ThirdClassSurvivor/ThirdClass)*100)
PropThirdClassSurvivor
## [1] 25.21246
####Survival proportion of each class
PropCrewSurvivor
## [1] 23.9548
PropFirstClassSurvivor
## [1] 62.46154
PropSecondClassSurvivor
## [1] 41.40351
PropThirdClassSurvivor
## [1] 25.21246
####Male
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_M <- filter(Titanic, Sex == 1,Survive==1 )
TitanicSurvivor_M
## Source: local data frame [367 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 1 1 1 1
## 2 1 1 1 1
## 3 1 1 1 1
## 4 1 1 1 1
## 5 1 1 1 1
## 6 1 1 1 1
## 7 1 1 1 1
## 8 1 1 1 1
## 9 1 1 1 1
## 10 1 1 1 1
## .. ... ... ... ...
MaleSurvivor<-nrow(TitanicSurvivor_M)
MaleSurvivor
## [1] 367
PropMaleSurvivor<-((MaleSurvivor/Male)*100)
PropMaleSurvivor
## [1] 21.20162
####Female
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_F <- filter(Titanic, Sex == 0,Survive==1 )
TitanicSurvivor_F
## Source: local data frame [344 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 1 1 0 1
## 2 1 1 0 1
## 3 1 1 0 1
## 4 1 1 0 1
## 5 1 1 0 1
## 6 1 1 0 1
## 7 1 1 0 1
## 8 1 1 0 1
## 9 1 1 0 1
## 10 1 1 0 1
## .. ... ... ... ...
FemaleSurvivor<-nrow(TitanicSurvivor_F)
FemaleSurvivor
## [1] 344
PropFemaleSurvivor<-((FemaleSurvivor/Female)*100)
PropFemaleSurvivor
## [1] 73.19149
####Survival proportion of each Sex
PropMaleSurvivor
## [1] 21.20162
PropFemaleSurvivor
## [1] 73.19149
highestsurvivalrate <-ifelse(PropMaleSurvivor>PropFemaleSurvivor,"Male have the highest survival rate","Female have the highest survivalrate")
highestsurvivalrate
## [1] "Female have the highest survivalrate"
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_A <- filter(Titanic, Age == 1,Survive==1 )
TitanicSurvivor_A
## Source: local data frame [654 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 1 1 1 1
## 2 1 1 1 1
## 3 1 1 1 1
## 4 1 1 1 1
## 5 1 1 1 1
## 6 1 1 1 1
## 7 1 1 1 1
## 8 1 1 1 1
## 9 1 1 1 1
## 10 1 1 1 1
## .. ... ... ... ...
AdultSurvivor<-nrow(TitanicSurvivor_A)
AdultSurvivor
## [1] 654
PropAdultSurvivor<-((AdultSurvivor/Adult)*100)
PropAdultSurvivor
## [1] 31.26195
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_C <- filter(Titanic, Age == 0,Survive==1 )
TitanicSurvivor_C
## Source: local data frame [57 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 1 0 1 1
## 2 1 0 1 1
## 3 1 0 1 1
## 4 1 0 1 1
## 5 1 0 1 1
## 6 1 0 0 1
## 7 2 0 1 1
## 8 2 0 1 1
## 9 2 0 1 1
## 10 2 0 1 1
## .. ... ... ... ...
ChildSurvivor<-nrow(TitanicSurvivor_C)
ChildSurvivor
## [1] 57
PropChildSurvivor<-((ChildSurvivor/Child)*100)
PropChildSurvivor
## [1] 52.29358
####Survival proportion of each Age
PropAdultSurvivor
## [1] 31.26195
PropChildSurvivor
## [1] 52.29358
Agelowestsurvivalrate <-ifelse(PropAdultSurvivor<PropChildSurvivor,"Adults have the Age lowest survival rate","Child have the Age lowest survival rate")
Agelowestsurvivalrate
## [1] "Adults have the Age lowest survival rate"
AdM <- filter(Titanic, Age == 1,Sex==1 ,Survive== 1) # AdM is Adult Male
AdMM <- filter(Titanic, Age == 1,Sex==1)
AdM<-((nrow(AdM)/nrow (AdMM))*100)
AdM #the proportion of passengers surviving for each Adult/Male category
## [1] 20.27594
ChM <- filter(Titanic, Age == 0,Sex==1 ,Survive== 1) #ChM is Child Male
ChMM <- filter(Titanic, Age == 0,Sex==1)
ChM<-((nrow(ChM)/nrow (ChMM))*100)
ChM #the proportion of passengers surviving for each Child/Male category
## [1] 45.3125
AdF <- filter(Titanic, Age == 1,Sex==0 ,Survive== 1) #AdF is Adult Female
AdFF <- filter(Titanic, Age == 1,Sex==0)
AdF<-((nrow(AdF)/nrow (AdFF))*100)
AdF #the proportion of passengers surviving for each Adult/Female category
## [1] 74.35294
ChF <- filter(Titanic, Age == 0,Sex==0 ,Survive== 1) #ChF is Child Female
ChFF <- filter(Titanic, Age == 0,Sex==0)
ChF<-((nrow(ChF)/nrow (ChFF))*100)
ChF #the proportion of passengers surviving for each Child/Female category
## [1] 62.22222
####Survival proportion of each age/sex category
AdM
## [1] 20.27594
ChM
## [1] 45.3125
AdF
## [1] 74.35294
ChF
## [1] 62.22222
#### The highest survival rate is the Adult Female. The lowest survival rate is the Adult male.
TitanicSurvivor_C0 <- filter(Titanic, Class == 0)
TitanicSurvivor_C0 %>%
group_by(Age, Sex, Survive) %>%
summarise(number = n()) %>%
mutate(freq = (number / sum(number)*100))
## Source: local data frame [4 x 5]
## Groups: Age, Sex [2]
##
## Age Sex Survive number freq
## (int) (int) (int) (int) (dbl)
## 1 1 0 0 3 13.04348
## 2 1 0 1 20 86.95652
## 3 1 1 0 670 77.72622
## 4 1 1 1 192 22.27378
TitanicSurvivor_C1 <- filter(Titanic, Class == 1)
TitanicSurvivor_C1 %>%
group_by(Age, Sex, Survive) %>%
summarise(number = n()) %>%
mutate(freq = (number / sum(number)*100))
## Source: local data frame [6 x 5]
## Groups: Age, Sex [4]
##
## Age Sex Survive number freq
## (int) (int) (int) (int) (dbl)
## 1 0 0 1 1 100.000000
## 2 0 1 1 5 100.000000
## 3 1 0 0 4 2.777778
## 4 1 0 1 140 97.222222
## 5 1 1 0 118 67.428571
## 6 1 1 1 57 32.571429
TitanicSurvivor_C2 <- filter(Titanic, Class == 2)
TitanicSurvivor_C2 %>%
group_by(Age, Sex, Survive) %>%
summarise(number = n()) %>%
mutate(freq = (number / sum(number)*100))
## Source: local data frame [6 x 5]
## Groups: Age, Sex [4]
##
## Age Sex Survive number freq
## (int) (int) (int) (int) (dbl)
## 1 0 0 1 13 100.000000
## 2 0 1 1 11 100.000000
## 3 1 0 0 13 13.978495
## 4 1 0 1 80 86.021505
## 5 1 1 0 154 91.666667
## 6 1 1 1 14 8.333333
TitanicSurvivor_C3 <- filter(Titanic, Class == 3)
TitanicSurvivor_C3 %>%
group_by(Age, Sex, Survive) %>%
summarise(number = n()) %>%
mutate(freq = (number / sum(number)*100))
## Source: local data frame [8 x 5]
## Groups: Age, Sex [4]
##
## Age Sex Survive number freq
## (int) (int) (int) (int) (dbl)
## 1 0 0 0 17 54.83871
## 2 0 0 1 14 45.16129
## 3 0 1 0 35 72.91667
## 4 0 1 1 13 27.08333
## 5 1 0 0 89 53.93939
## 6 1 0 1 76 46.06061
## 7 1 1 0 387 83.76623
## 8 1 1 1 75 16.23377
TitanicSurvivor_C0
## Source: local data frame [885 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 0 1 1 1
## 2 0 1 1 1
## 3 0 1 1 1
## 4 0 1 1 1
## 5 0 1 1 1
## 6 0 1 1 1
## 7 0 1 1 1
## 8 0 1 1 1
## 9 0 1 1 1
## 10 0 1 1 1
## .. ... ... ... ...
TitanicSurvivor_C1
## Source: local data frame [325 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 1 1 1 1
## 2 1 1 1 1
## 3 1 1 1 1
## 4 1 1 1 1
## 5 1 1 1 1
## 6 1 1 1 1
## 7 1 1 1 1
## 8 1 1 1 1
## 9 1 1 1 1
## 10 1 1 1 1
## .. ... ... ... ...
TitanicSurvivor_C2
## Source: local data frame [285 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 2 1 1 1
## 2 2 1 1 1
## 3 2 1 1 1
## 4 2 1 1 1
## 5 2 1 1 1
## 6 2 1 1 1
## 7 2 1 1 1
## 8 2 1 1 1
## 9 2 1 1 1
## 10 2 1 1 1
## .. ... ... ... ...
TitanicSurvivor_C3
## Source: local data frame [706 x 4]
##
## Class Age Sex Survive
## (int) (int) (int) (int)
## 1 3 1 1 1
## 2 3 1 1 1
## 3 3 1 1 1
## 4 3 1 1 1
## 5 3 1 1 1
## 6 3 1 1 1
## 7 3 1 1 1
## 8 3 1 1 1
## 9 3 1 1 1
## 10 3 1 1 1
## .. ... ... ... ...
I get the following summary results: In terms of class: first class was the higher survival rate. In terms of Sex: female were the higher survival rate. In terms of Age: children were the higher survival rate. Also, adult female group were the highest survival rate. However, Adult male group were the lowest survival rate.