1. Import the titanic540.csv dataset into R.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
titanic <-
read.csv("http://www.personal.psu.edu/dlp/w540/titanic540.csv")
2. Convert the titanic540.csv dataset into data frame as a “tibble.”
titanic_df <- tbl_df(titanic)
titanic_df
## # A tibble: 1,309 x 8
## pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 female 29 0 0 211.34 S
## 2 1 1 male 1 1 2 151.55 S
## 3 1 0 female 2 1 2 151.55 S
## 4 1 0 male 30 1 2 151.55 S
## 5 1 0 female 25 1 2 151.55 S
## 6 1 1 male 48 0 0 26.55 S
## 7 1 1 female 63 1 0 77.96 S
## 8 1 0 male 39 0 0 0.00 S
## 9 1 1 female 53 2 0 51.48 S
## 10 1 0 male 71 0 0 49.50 C
## # ... with 1,299 more rows
3. Calculate the number of surviving passengers.
titanic_df%>%
select(survived)%>%
filter(survived == 1)
## # A tibble: 500 x 1
## survived
## <int>
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## # ... with 490 more rows
4. Calculate the proportion of surviving passengers by sex.
#339 females and 161 males survived
table(titanic_df$survived, titanic_df$sex)
##
## female male
## 0 127 682
## 1 339 161
5. Calculate the mean (average) age of surviving female passengers
titanic_df%>%
select(sex, age)%>%
filter(sex == "female")
## # A tibble: 466 x 2
## sex age
## <fctr> <int>
## 1 female 29
## 2 female 2
## 3 female 25
## 4 female 63
## 5 female 53
## 6 female 18
## 7 female 24
## 8 female 26
## 9 female 50
## 10 female 32
## # ... with 456 more rows
6. Calculate the number of surviving passengers 10 years old or younger
titanic_df%>%
select(survived, age)%>%
filter(age<= 10)
## # A tibble: 86 x 2
## survived age
## <int> <int>
## 1 1 1
## 2 0 2
## 3 1 4
## 4 1 6
## 5 1 1
## 6 1 4
## 7 1 1
## 8 1 8
## 9 1 8
## 10 1 8
## # ... with 76 more rows
8. Calculate the proportion of surviving passengers by port of embarkation.
titanic_df%>%
filter(survived==1)%>%
group_by(embarked)
## # A tibble: 500 x 8
## # Groups: embarked [4]
## pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 female 29 0 0 211.34 S
## 2 1 1 male 1 1 2 151.55 S
## 3 1 1 male 48 0 0 26.55 S
## 4 1 1 female 63 1 0 77.96 S
## 5 1 1 female 53 2 0 51.48 S
## 6 1 1 female 18 1 0 227.53 C
## 7 1 1 female 24 0 0 69.30 C
## 8 1 1 female 26 0 0 78.85 S
## 9 1 1 male 80 0 0 30.00 S
## 10 1 1 female 50 0 1 247.52 C
## # ... with 490 more rows
9.Calculate the number of surviving female passengers over the age of 40 years old by port of embarkation
female_passangers <- filter(select(titanic_df, sex, survived, age, embarked))
filter (female_passangers, age>40, sex=="female")%>%
group_by(embarked)
## # A tibble: 78 x 4
## # Groups: embarked [3]
## sex survived age embarked
## <fctr> <int> <int> <fctr>
## 1 female 1 63 S
## 2 female 1 53 S
## 3 female 1 50 C
## 4 female 1 47 S
## 5 female 1 42 C
## 6 female 1 58 S
## 7 female 1 45 C
## 8 female 1 44 C
## 9 female 1 59 S
## 10 female 1 60 C
## # ... with 68 more rows
10. Calculate the mean (average) fare that passengers paid by port of embarkation.
fare_df <- filter(select(titanic_df, fare, embarked))
cost <- tbl_df(fare_df)
cost%>%
group_by(embarked)%>%
summarise(avg = mean(fare, na.rm = TRUE))
## # A tibble: 4 x 2
## embarked avg
## <fctr> <dbl>
## 1 80.00000
## 2 C 62.33719
## 3 Q 12.40935
## 4 S 27.41963
11. Calculate number of surviving passengers who had any siblings/spouses aboard the Titanic.
titanic_df%>%
select(survived, sibsp)%>%
filter(survived == 1)%>%
filter(sibsp > 0)
## # A tibble: 191 x 2
## survived sibsp
## <int> <int>
## 1 1 1
## 2 1 1
## 3 1 2
## 4 1 1
## 5 1 1
## 6 1 1
## 7 1 1
## 8 1 1
## 9 1 2
## 10 1 1
## # ... with 181 more rows
12. Calculate number of surviving passengers who had any parents/children aboard the Titanic.
titanic_df%>%
select(survived, parch)%>%
filter(survived == 1)%>%
filter(parch > 0)
## # A tibble: 164 x 2
## survived parch
## <int> <int>
## 1 1 2
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 1 1
## 7 1 1
## 8 1 2
## 9 1 2
## 10 1 2
## # ... with 154 more rows
13.Calculate the mean (average) fare that passengers paid by passenger class.
class_fare <- filter(select(titanic_df, fare, pclass))
class_cost <- tbl_df(class_fare)
class_cost%>%
group_by(pclass)%>%
summarise(avg = mean(fare, na.rm = TRUE))
## # A tibble: 3 x 2
## pclass avg
## <int> <dbl>
## 1 1 87.50935
## 2 2 21.17928
## 3 3 13.30414
14.Calculate a regular frequency distribution of the number of parents/children aboard the Titanic of female passengers.
freq_df <- titanic_df%>%
select(parch, sex)
table(freq_df)
## sex
## parch female male
## 0 293 709
## 1 88 82
## 2 69 44
## 3 6 2
## 4 4 2
## 5 4 2
## 6 1 1
## 9 1 1
15. Calculate a regular frequency distribution of the number of siblings/spouses of male passengers who had at least one or more siblings/spouses aboard the Titanic
male_df <- titanic_df%>%
select(sibsp, sex)
table(male_df)
## sex
## sibsp female male
## 0 262 629
## 1 160 159
## 2 19 23
## 3 12 8
## 4 7 15
## 5 2 4
## 8 4 5