1. Import the titanic540.csv dataset into R.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
titanic <- 
  read.csv("http://www.personal.psu.edu/dlp/w540/titanic540.csv")

2. Convert the titanic540.csv dataset into data frame as a “tibble.”

titanic_df <- tbl_df(titanic)
titanic_df
## # A tibble: 1,309 x 8
##    pclass survived    sex   age sibsp parch   fare embarked
##     <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1      1        1 female    29     0     0 211.34        S
##  2      1        1   male     1     1     2 151.55        S
##  3      1        0 female     2     1     2 151.55        S
##  4      1        0   male    30     1     2 151.55        S
##  5      1        0 female    25     1     2 151.55        S
##  6      1        1   male    48     0     0  26.55        S
##  7      1        1 female    63     1     0  77.96        S
##  8      1        0   male    39     0     0   0.00        S
##  9      1        1 female    53     2     0  51.48        S
## 10      1        0   male    71     0     0  49.50        C
## # ... with 1,299 more rows

3. Calculate the number of surviving passengers.

titanic_df%>%
  select(survived)%>%
  filter(survived == 1)
## # A tibble: 500 x 1
##    survived
##       <int>
##  1        1
##  2        1
##  3        1
##  4        1
##  5        1
##  6        1
##  7        1
##  8        1
##  9        1
## 10        1
## # ... with 490 more rows

4. Calculate the proportion of surviving passengers by sex.

#339 females and 161 males survived
table(titanic_df$survived, titanic_df$sex)
##    
##     female male
##   0    127  682
##   1    339  161

5. Calculate the mean (average) age of surviving female passengers

titanic_df%>%
  select(sex, age)%>%
  filter(sex == "female")
## # A tibble: 466 x 2
##       sex   age
##    <fctr> <int>
##  1 female    29
##  2 female     2
##  3 female    25
##  4 female    63
##  5 female    53
##  6 female    18
##  7 female    24
##  8 female    26
##  9 female    50
## 10 female    32
## # ... with 456 more rows

6. Calculate the number of surviving passengers 10 years old or younger

titanic_df%>%
  select(survived, age)%>%
  filter(age<= 10)
## # A tibble: 86 x 2
##    survived   age
##       <int> <int>
##  1        1     1
##  2        0     2
##  3        1     4
##  4        1     6
##  5        1     1
##  6        1     4
##  7        1     1
##  8        1     8
##  9        1     8
## 10        1     8
## # ... with 76 more rows

7.Calculate the maximum, minimum, and median age of surviving passengers 10 years old or older

child_df <-titanic_df %>%
  filter(age>= 10)
child_df%>%
  summarise(min.age = min(age),
          max.age = max(age),
          mean.age = mean(age))
## # A tibble: 1 x 3
##   min.age max.age mean.age
##     <dbl>   <dbl>    <dbl>
## 1      10      80 32.09751

8. Calculate the proportion of surviving passengers by port of embarkation.

titanic_df%>%
  filter(survived==1)%>%
  group_by(embarked)
## # A tibble: 500 x 8
## # Groups:   embarked [4]
##    pclass survived    sex   age sibsp parch   fare embarked
##     <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1      1        1 female    29     0     0 211.34        S
##  2      1        1   male     1     1     2 151.55        S
##  3      1        1   male    48     0     0  26.55        S
##  4      1        1 female    63     1     0  77.96        S
##  5      1        1 female    53     2     0  51.48        S
##  6      1        1 female    18     1     0 227.53        C
##  7      1        1 female    24     0     0  69.30        C
##  8      1        1 female    26     0     0  78.85        S
##  9      1        1   male    80     0     0  30.00        S
## 10      1        1 female    50     0     1 247.52        C
## # ... with 490 more rows

9.Calculate the number of surviving female passengers over the age of 40 years old by port of embarkation

female_passangers <- filter(select(titanic_df, sex, survived, age, embarked))
filter (female_passangers, age>40, sex=="female")%>%
  group_by(embarked)
## # A tibble: 78 x 4
## # Groups:   embarked [3]
##       sex survived   age embarked
##    <fctr>    <int> <int>   <fctr>
##  1 female        1    63        S
##  2 female        1    53        S
##  3 female        1    50        C
##  4 female        1    47        S
##  5 female        1    42        C
##  6 female        1    58        S
##  7 female        1    45        C
##  8 female        1    44        C
##  9 female        1    59        S
## 10 female        1    60        C
## # ... with 68 more rows

10. Calculate the mean (average) fare that passengers paid by port of embarkation.

fare_df <- filter(select(titanic_df, fare, embarked))
cost <- tbl_df(fare_df)
cost%>%
  group_by(embarked)%>%
  summarise(avg = mean(fare, na.rm = TRUE))
## # A tibble: 4 x 2
##   embarked      avg
##     <fctr>    <dbl>
## 1          80.00000
## 2        C 62.33719
## 3        Q 12.40935
## 4        S 27.41963

11. Calculate number of surviving passengers who had any siblings/spouses aboard the Titanic.

titanic_df%>%
  select(survived, sibsp)%>%
  filter(survived == 1)%>%
  filter(sibsp > 0)
## # A tibble: 191 x 2
##    survived sibsp
##       <int> <int>
##  1        1     1
##  2        1     1
##  3        1     2
##  4        1     1
##  5        1     1
##  6        1     1
##  7        1     1
##  8        1     1
##  9        1     2
## 10        1     1
## # ... with 181 more rows

12. Calculate number of surviving passengers who had any parents/children aboard the Titanic.

titanic_df%>%
  select(survived, parch)%>%
  filter(survived == 1)%>%
  filter(parch > 0)
## # A tibble: 164 x 2
##    survived parch
##       <int> <int>
##  1        1     2
##  2        1     1
##  3        1     1
##  4        1     1
##  5        1     1
##  6        1     1
##  7        1     1
##  8        1     2
##  9        1     2
## 10        1     2
## # ... with 154 more rows

13.Calculate the mean (average) fare that passengers paid by passenger class.

class_fare <- filter(select(titanic_df, fare, pclass))
class_cost <- tbl_df(class_fare)
class_cost%>%
  group_by(pclass)%>%
  summarise(avg = mean(fare, na.rm = TRUE))
## # A tibble: 3 x 2
##   pclass      avg
##    <int>    <dbl>
## 1      1 87.50935
## 2      2 21.17928
## 3      3 13.30414

14.Calculate a regular frequency distribution of the number of parents/children aboard the Titanic of female passengers.

freq_df <- titanic_df%>%
  select(parch, sex)
table(freq_df)
##      sex
## parch female male
##     0    293  709
##     1     88   82
##     2     69   44
##     3      6    2
##     4      4    2
##     5      4    2
##     6      1    1
##     9      1    1

15. Calculate a regular frequency distribution of the number of siblings/spouses of male passengers who had at least one or more siblings/spouses aboard the Titanic

male_df <- titanic_df%>%
  select(sibsp, sex)
table(male_df)
##      sex
## sibsp female male
##     0    262  629
##     1    160  159
##     2     19   23
##     3     12    8
##     4      7   15
##     5      2    4
##     8      4    5