library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#### Task 1 Import titanic540 csv file
csvtitanic<-read.csv("http://www.personal.psu.edu/dlp/w540/titanic540.csv")
Task 2 Convert titanic540.csv data frame as a “tibble”
csvtitanic<-tbl_df(csvtitanic)
csvtitanic
## # A tibble: 1,309 x 8
## ï..pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 female 29 0 0 211.34 S
## 2 1 1 male 1 1 2 151.55 S
## 3 1 0 female 2 1 2 151.55 S
## 4 1 0 male 30 1 2 151.55 S
## 5 1 0 female 25 1 2 151.55 S
## 6 1 1 male 48 0 0 26.55 S
## 7 1 1 female 63 1 0 77.96 S
## 8 1 0 male 39 0 0 0.00 S
## 9 1 1 female 53 2 0 51.48 S
## 10 1 0 male 71 0 0 49.50 C
## # ... with 1,299 more rows
Task 3 calculate the portion of surviving passengers
select.surviving<-csvtitanic%>%
select(survived=1)
select.surviving
## # A tibble: 1,309 x 1
## survived
## <int>
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## # ... with 1,299 more rows
task 4 Calculate the portion of surviving passengers by sex
csvtitanic%>%
select(csvtitanic$sex)
## # A tibble: 1,309 x 2
## ï..pclass survived
## <int> <int>
## 1 1 1
## 2 1 1
## 3 1 0
## 4 1 0
## 5 1 0
## 6 1 1
## 7 1 1
## 8 1 0
## 9 1 1
## 10 1 0
## # ... with 1,299 more rows
Task 5 Calculate the average age of surviving female passengers
csvtitanic%>%
filter(age,sex=="female")%>%
summarise(mean.age=mean(age,ne.rm=TRUE))
## # A tibble: 1 x 1
## mean.age
## <dbl>
## 1 28.76744
Task 6 number of survivers less than 10 years of age
filter.survivers<-csvtitanic%>%
filter(survived,age<10,n())
filter.survivers
## # A tibble: 50 x 8
## ï..pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 male 1 1 2 151.55 S
## 2 1 1 male 4 0 2 81.86 S
## 3 1 1 male 6 0 2 134.50 C
## 4 2 1 male 1 2 1 39.00 S
## 5 2 1 female 4 2 1 39.00 S
## 6 2 1 male 1 0 2 29.00 S
## 7 2 1 female 8 0 2 26.25 S
## 8 2 1 male 8 1 1 36.75 S
## 9 2 1 male 8 0 2 32.50 S
## 10 2 1 male 1 1 1 14.50 S
## # ... with 40 more rows
Task 7 Calculate min,max,avg age of surviving passangers 10 years old or older
csvtitanic%>%
filter(age>10,survived)%>%
summarise(min.age=min(age,na.rm=TRUE),
max.age=max(age,na.rm=TRUE),
avg.age=mean(age,na.rm=TRUE))
## # A tibble: 1 x 3
## min.age max.age avg.age
## <dbl> <dbl> <dbl>
## 1 11 80 32.26525
Task 8 Calculate portion of passengers by port embarked
csvtitanic%>%
group_by(embarked,survived)
## # A tibble: 1,309 x 8
## # Groups: embarked, survived [7]
## ï..pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 female 29 0 0 211.34 S
## 2 1 1 male 1 1 2 151.55 S
## 3 1 0 female 2 1 2 151.55 S
## 4 1 0 male 30 1 2 151.55 S
## 5 1 0 female 25 1 2 151.55 S
## 6 1 1 male 48 0 0 26.55 S
## 7 1 1 female 63 1 0 77.96 S
## 8 1 0 male 39 0 0 0.00 S
## 9 1 1 female 53 2 0 51.48 S
## 10 1 0 male 71 0 0 49.50 C
## # ... with 1,299 more rows
Task 9 calculate the number of surviving female passangers over 40 years
of age by port
csvtitanic%>%
filter(sex=="female",age)%>%
summarise(mean.age=mean(age,na.rm=TRUE))
## # A tibble: 1 x 1
## mean.age
## <dbl>
## 1 28.76744
Task 10 Calculate the avg. fare by port
csvtitanic%>%
group_by(embarked)%>%
summarise(mean.fare=mean(fare,na.rm=TRUE))
## # A tibble: 4 x 2
## embarked mean.fare
## <fctr> <dbl>
## 1 80.00000
## 2 C 62.33719
## 3 Q 12.40935
## 4 S 27.41963
Task 11 Calculate the number ofsurviving passangers who had siblings
csvtitanic%>%
filter(survived==1,sibsp)
## # A tibble: 191 x 8
## ï..pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 male 1 1 2 151.55 S
## 2 1 1 female 63 1 0 77.96 S
## 3 1 1 female 53 2 0 51.48 S
## 4 1 1 female 18 1 0 227.53 C
## 5 1 1 male 37 1 1 52.55 S
## 6 1 1 female 47 1 1 52.55 S
## 7 1 1 male 25 1 0 91.08 C
## 8 1 1 female 19 1 0 91.08 C
## 9 1 1 female 59 2 0 51.48 S
## 10 1 1 male 11 1 2 120.00 S
## # ... with 181 more rows
Task 12 Calculate the number of surviving passangers who had parents/children abord
csvtitanic%>%
filter(survived==1,parch)
## # A tibble: 164 x 8
## ï..pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 male 1 1 2 151.55 S
## 2 1 1 female 50 0 1 247.52 C
## 3 1 1 male 37 1 1 52.55 S
## 4 1 1 female 47 1 1 52.55 S
## 5 1 1 female 22 0 1 55.00 S
## 6 1 1 male 36 0 1 512.33 C
## 7 1 1 female 58 0 1 512.33 C
## 8 1 1 male 11 1 2 120.00 S
## 9 1 1 female 14 1 2 120.00 S
## 10 1 1 male 36 1 2 120.00 S
## # ... with 154 more rows
Task 13 Calculate the avg. fare by class
csvtitanic%>%
filter(fare,ï..pclass)%>%
group_by(ï..pclass)%>%
summarise(mean.fare=mean(fare,na.rm=TRUE))
## # A tibble: 3 x 2
## ï..pclass mean.fare
## <int> <dbl>
## 1 1 89.44785
## 2 2 21.64819
## 3 3 13.37973
Task 14 Calculate the frequency distribution of the number of parent/children.
freq.parchil<-table(csvtitanic$parch)
cbind(freq.parchil)
## freq.parchil
## 0 1002
## 1 170
## 2 113
## 3 8
## 4 6
## 5 6
## 6 2
## 9 2
Task 15 Calculate the regular feq. distribution of the number of siblings/spouses of male passengers who had at least one or more silblings/spouses on board