require(datasets)
require(ggvis)
## Loading required package: ggvis
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(magrittr)
## Loading required package: magrittr

1. Read a .csv file containing data elements about Titanic travelers

Titanicsurvival <- read.csv(file = "http://www.personal.psu.edu/dlp/w540/datasets/titanicsurvival.csv", header = TRUE, sep=",")
Titanicsurvival <-tbl_df(Titanicsurvival)
Titanicsurvival
## Source: local data frame [2,201 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      1     1     1       1
## 2      1     1     1       1
## 3      1     1     1       1
## 4      1     1     1       1
## 5      1     1     1       1
## 6      1     1     1       1
## 7      1     1     1       1
## 8      1     1     1       1
## 9      1     1     1       1
## 10     1     1     1       1
## ..   ...   ...   ...     ...

2. Calculate the total number of passengers in the dataset.

TitanicPassengers <- nrow(Titanicsurvival)
TitanicPassengers
## [1] 2201

The following are some useful information for the remaining questions

Crew<- nrow(filter(Titanicsurvival, Class == 0))
Crew
## [1] 885
FirstClass<-nrow(filter(Titanicsurvival, Class == 1))
FirstClass
## [1] 325
SecondClass<-nrow(filter(Titanicsurvival, Class == 2))
SecondClass
## [1] 285
ThirdClass<-nrow(filter(Titanicsurvival, Class == 3))
ThirdClass
## [1] 706
Male<-nrow(filter(Titanicsurvival, Sex == 1))
Male
## [1] 1731
Female<-nrow(filter(Titanicsurvival, Sex == 0))
Female
## [1] 470
Adult<-nrow(filter(Titanicsurvival, Age == 1))
Adult
## [1] 2092
Child<-nrow(filter(Titanicsurvival, Age == 0))
Child
## [1] 109

3. Calculate the total proportion of passengers surviving.

TitanicSurvived <- filter(Titanicsurvival, Survive == 1)
TitanicSurvived1<- nrow(TitanicSurvived)
TitanicSurvived1
## [1] 711
TitanicSurvived2<-((TitanicSurvived1/TitanicPassengers)*100)
TitanicSurvived2
## [1] 32.3035

4. Calculate the proportion of passengers surviving for each class of passenger.

###Crew
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_C0 <- filter(Titanic, Class == 0,Survive==1 )
TitanicSurvivor_C0
## Source: local data frame [212 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      0     1     1       1
## 2      0     1     1       1
## 3      0     1     1       1
## 4      0     1     1       1
## 5      0     1     1       1
## 6      0     1     1       1
## 7      0     1     1       1
## 8      0     1     1       1
## 9      0     1     1       1
## 10     0     1     1       1
## ..   ...   ...   ...     ...
CrewSurvivor<-nrow(TitanicSurvivor_C0)
CrewSurvivor
## [1] 212
PropCrewSurvivor<-((CrewSurvivor/Crew)*100)
PropCrewSurvivor
## [1] 23.9548
###1st Class
TitanicSurvivor_C1 <- filter(Titanic, Class == 1,Survive==1 )
TitanicSurvivor_C1
## Source: local data frame [203 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      1     1     1       1
## 2      1     1     1       1
## 3      1     1     1       1
## 4      1     1     1       1
## 5      1     1     1       1
## 6      1     1     1       1
## 7      1     1     1       1
## 8      1     1     1       1
## 9      1     1     1       1
## 10     1     1     1       1
## ..   ...   ...   ...     ...
FirstClassSurvivor<-nrow(TitanicSurvivor_C1)
FirstClassSurvivor
## [1] 203
PropFirstClassSurvivor<-((FirstClassSurvivor/FirstClass)*100)
PropFirstClassSurvivor
## [1] 62.46154
###2nd Class
TitanicSurvivor_C2 <- filter(Titanic, Class == 2,Survive==1 )
TitanicSurvivor_C2
## Source: local data frame [118 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      2     1     1       1
## 2      2     1     1       1
## 3      2     1     1       1
## 4      2     1     1       1
## 5      2     1     1       1
## 6      2     1     1       1
## 7      2     1     1       1
## 8      2     1     1       1
## 9      2     1     1       1
## 10     2     1     1       1
## ..   ...   ...   ...     ...
SecondClassSurvivor<-nrow(TitanicSurvivor_C2)
SecondClassSurvivor
## [1] 118
PropSecondClassSurvivor<-((SecondClassSurvivor/SecondClass)*100)
PropSecondClassSurvivor
## [1] 41.40351
###3rd Class
TitanicSurvivor_C3 <- filter(Titanic, Class == 3,Survive==1 )
TitanicSurvivor_C3
## Source: local data frame [178 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      3     1     1       1
## 2      3     1     1       1
## 3      3     1     1       1
## 4      3     1     1       1
## 5      3     1     1       1
## 6      3     1     1       1
## 7      3     1     1       1
## 8      3     1     1       1
## 9      3     1     1       1
## 10     3     1     1       1
## ..   ...   ...   ...     ...
ThirdClassSurvivor<-nrow(TitanicSurvivor_C3)
ThirdClassSurvivor
## [1] 178
PropThirdClassSurvivor<-((ThirdClassSurvivor/ThirdClass)*100)
PropThirdClassSurvivor
## [1] 25.21246
####Survival proportion of each class
PropCrewSurvivor
## [1] 23.9548
PropFirstClassSurvivor
## [1] 62.46154
PropSecondClassSurvivor
## [1] 41.40351
PropThirdClassSurvivor
## [1] 25.21246

5. Calculate the proportion of passengers surviving for each sex category. Which sex had the highest survival rate?

####Male
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_M <- filter(Titanic, Sex == 1,Survive==1 )
TitanicSurvivor_M
## Source: local data frame [367 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      1     1     1       1
## 2      1     1     1       1
## 3      1     1     1       1
## 4      1     1     1       1
## 5      1     1     1       1
## 6      1     1     1       1
## 7      1     1     1       1
## 8      1     1     1       1
## 9      1     1     1       1
## 10     1     1     1       1
## ..   ...   ...   ...     ...
MaleSurvivor<-nrow(TitanicSurvivor_M)
MaleSurvivor
## [1] 367
PropMaleSurvivor<-((MaleSurvivor/Male)*100)
PropMaleSurvivor
## [1] 21.20162
####Female
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_F <- filter(Titanic, Sex == 0,Survive==1 )
TitanicSurvivor_F
## Source: local data frame [344 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      1     1     0       1
## 2      1     1     0       1
## 3      1     1     0       1
## 4      1     1     0       1
## 5      1     1     0       1
## 6      1     1     0       1
## 7      1     1     0       1
## 8      1     1     0       1
## 9      1     1     0       1
## 10     1     1     0       1
## ..   ...   ...   ...     ...
FemaleSurvivor<-nrow(TitanicSurvivor_F)
FemaleSurvivor
## [1] 344
PropFemaleSurvivor<-((FemaleSurvivor/Female)*100)
PropFemaleSurvivor
## [1] 73.19149
####Survival proportion of each Sex
PropMaleSurvivor
## [1] 21.20162
PropFemaleSurvivor
## [1] 73.19149
highestsurvivalrate <-ifelse(PropMaleSurvivor>PropFemaleSurvivor,"Male have the highest survival rate","Female have the highest survivalrate")
highestsurvivalrate
## [1] "Female have the highest survivalrate"

6. Calculate the proportion of passengers surviving for each age category. Which age had the lowest survival rate?

Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_A <- filter(Titanic, Age == 1,Survive==1 )
TitanicSurvivor_A
## Source: local data frame [654 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      1     1     1       1
## 2      1     1     1       1
## 3      1     1     1       1
## 4      1     1     1       1
## 5      1     1     1       1
## 6      1     1     1       1
## 7      1     1     1       1
## 8      1     1     1       1
## 9      1     1     1       1
## 10     1     1     1       1
## ..   ...   ...   ...     ...
AdultSurvivor<-nrow(TitanicSurvivor_A)
AdultSurvivor
## [1] 654
PropAdultSurvivor<-((AdultSurvivor/Adult)*100)
PropAdultSurvivor
## [1] 31.26195
Titanic <- tbl_df(Titanicsurvival)
TitanicSurvivor_C <- filter(Titanic, Age == 0,Survive==1 )
TitanicSurvivor_C
## Source: local data frame [57 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      1     0     1       1
## 2      1     0     1       1
## 3      1     0     1       1
## 4      1     0     1       1
## 5      1     0     1       1
## 6      1     0     0       1
## 7      2     0     1       1
## 8      2     0     1       1
## 9      2     0     1       1
## 10     2     0     1       1
## ..   ...   ...   ...     ...
ChildSurvivor<-nrow(TitanicSurvivor_C)
ChildSurvivor
## [1] 57
PropChildSurvivor<-((ChildSurvivor/Child)*100)
PropChildSurvivor
## [1] 52.29358
####Survival proportion of each Age
PropAdultSurvivor
## [1] 31.26195
PropChildSurvivor
## [1] 52.29358
Agelowestsurvivalrate <-ifelse(PropAdultSurvivor<PropChildSurvivor,"Adults have the Age lowest survival rate","Child have the Age lowest survival rate")
Agelowestsurvivalrate
## [1] "Adults have the Age lowest survival rate"

7. Calculate the proportion of passengers surviving for each age/sex category (i.e., for adult males, child males, adult females, child females). Which group was most likely to survive? Least likely?

AdM <- filter(Titanic, Age == 1,Sex==1 ,Survive== 1) # AdM is Adult Male
AdMM <- filter(Titanic, Age == 1,Sex==1)
AdM<-((nrow(AdM)/nrow (AdMM))*100)
AdM #the proportion of passengers surviving for each Adult/Male category
## [1] 20.27594
ChM <- filter(Titanic, Age == 0,Sex==1 ,Survive== 1) #ChM is Child Male
ChMM <- filter(Titanic, Age == 0,Sex==1)
ChM<-((nrow(ChM)/nrow (ChMM))*100)
ChM #the proportion of passengers surviving for each Child/Male category
## [1] 45.3125
AdF <- filter(Titanic, Age == 1,Sex==0 ,Survive== 1) #AdF is Adult Female
AdFF <- filter(Titanic, Age == 1,Sex==0)
AdF<-((nrow(AdF)/nrow (AdFF))*100)
AdF #the proportion of passengers surviving for each Adult/Female category
## [1] 74.35294
ChF <- filter(Titanic, Age == 0,Sex==0 ,Survive== 1) #ChF is Child Female
ChFF <- filter(Titanic, Age == 0,Sex==0)
ChF<-((nrow(ChF)/nrow (ChFF))*100)
ChF #the proportion of passengers surviving for each Child/Female category
## [1] 62.22222
####Survival proportion of each age/sex category
AdM
## [1] 20.27594
ChM
## [1] 45.3125
AdF
## [1] 74.35294
ChF
## [1] 62.22222
#### The highest survival rate is the Adult Female. The lowest survival rate is the Adult male.

8. Calculate the proportion of passengers surviving for each age/sex/class category. Which group had the highest mortality in this disaster. Why?

TitanicSurvivor_C0 <- filter(Titanic, Class == 0) 
TitanicSurvivor_C0 %>%
    group_by(Age, Sex, Survive) %>%
    summarise(number = n()) %>%
    mutate(freq = (number / sum(number)*100))
## Source: local data frame [4 x 5]
## Groups: Age, Sex [2]
## 
##     Age   Sex Survive number     freq
##   (int) (int)   (int)  (int)    (dbl)
## 1     1     0       0      3 13.04348
## 2     1     0       1     20 86.95652
## 3     1     1       0    670 77.72622
## 4     1     1       1    192 22.27378
TitanicSurvivor_C1 <- filter(Titanic, Class == 1)
TitanicSurvivor_C1 %>%
    group_by(Age, Sex, Survive) %>%
    summarise(number = n()) %>%
    mutate(freq = (number / sum(number)*100))    
## Source: local data frame [6 x 5]
## Groups: Age, Sex [4]
## 
##     Age   Sex Survive number       freq
##   (int) (int)   (int)  (int)      (dbl)
## 1     0     0       1      1 100.000000
## 2     0     1       1      5 100.000000
## 3     1     0       0      4   2.777778
## 4     1     0       1    140  97.222222
## 5     1     1       0    118  67.428571
## 6     1     1       1     57  32.571429
TitanicSurvivor_C2 <- filter(Titanic, Class == 2)
TitanicSurvivor_C2 %>%
    group_by(Age, Sex, Survive) %>%
    summarise(number = n()) %>%
    mutate(freq = (number / sum(number)*100))
## Source: local data frame [6 x 5]
## Groups: Age, Sex [4]
## 
##     Age   Sex Survive number       freq
##   (int) (int)   (int)  (int)      (dbl)
## 1     0     0       1     13 100.000000
## 2     0     1       1     11 100.000000
## 3     1     0       0     13  13.978495
## 4     1     0       1     80  86.021505
## 5     1     1       0    154  91.666667
## 6     1     1       1     14   8.333333
TitanicSurvivor_C3 <- filter(Titanic, Class == 3) 
TitanicSurvivor_C3 %>%
    group_by(Age, Sex, Survive) %>%
    summarise(number = n()) %>%
    mutate(freq = (number / sum(number)*100))
## Source: local data frame [8 x 5]
## Groups: Age, Sex [4]
## 
##     Age   Sex Survive number     freq
##   (int) (int)   (int)  (int)    (dbl)
## 1     0     0       0     17 54.83871
## 2     0     0       1     14 45.16129
## 3     0     1       0     35 72.91667
## 4     0     1       1     13 27.08333
## 5     1     0       0     89 53.93939
## 6     1     0       1     76 46.06061
## 7     1     1       0    387 83.76623
## 8     1     1       1     75 16.23377
TitanicSurvivor_C0
## Source: local data frame [885 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      0     1     1       1
## 2      0     1     1       1
## 3      0     1     1       1
## 4      0     1     1       1
## 5      0     1     1       1
## 6      0     1     1       1
## 7      0     1     1       1
## 8      0     1     1       1
## 9      0     1     1       1
## 10     0     1     1       1
## ..   ...   ...   ...     ...
TitanicSurvivor_C1
## Source: local data frame [325 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      1     1     1       1
## 2      1     1     1       1
## 3      1     1     1       1
## 4      1     1     1       1
## 5      1     1     1       1
## 6      1     1     1       1
## 7      1     1     1       1
## 8      1     1     1       1
## 9      1     1     1       1
## 10     1     1     1       1
## ..   ...   ...   ...     ...
TitanicSurvivor_C2
## Source: local data frame [285 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      2     1     1       1
## 2      2     1     1       1
## 3      2     1     1       1
## 4      2     1     1       1
## 5      2     1     1       1
## 6      2     1     1       1
## 7      2     1     1       1
## 8      2     1     1       1
## 9      2     1     1       1
## 10     2     1     1       1
## ..   ...   ...   ...     ...
TitanicSurvivor_C3
## Source: local data frame [706 x 4]
## 
##    Class   Age   Sex Survive
##    (int) (int) (int)   (int)
## 1      3     1     1       1
## 2      3     1     1       1
## 3      3     1     1       1
## 4      3     1     1       1
## 5      3     1     1       1
## 6      3     1     1       1
## 7      3     1     1       1
## 8      3     1     1       1
## 9      3     1     1       1
## 10     3     1     1       1
## ..   ...   ...   ...     ...

The highest mortality in this disaster is the second class male becuase the survial rate for this group is 8.33% wich is the lowest survival rate. I think the reason might be those male sacrificed themselfe to safe others’ life.

9. Write a summary?

I get the following summary results: In terms of class: first class was the higher survival rate. In terms of Sex: female were the higher survival rate. In terms of Age: children were the higher survival rate. Also, adult female group were the highest survival rate. However, Adult male group were the lowest survival rate.