library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#### Task 1 Import titanic540 csv file
csvtitanic<-read.csv("http://www.personal.psu.edu/dlp/w540/titanic540.csv")

Task 2 Convert titanic540.csv data frame as a “tibble”

csvtitanic<-tbl_df(csvtitanic)
csvtitanic
## # A tibble: 1,309 x 8
##    ï..pclass survived    sex   age sibsp parch   fare embarked
##        <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1         1        1 female    29     0     0 211.34        S
##  2         1        1   male     1     1     2 151.55        S
##  3         1        0 female     2     1     2 151.55        S
##  4         1        0   male    30     1     2 151.55        S
##  5         1        0 female    25     1     2 151.55        S
##  6         1        1   male    48     0     0  26.55        S
##  7         1        1 female    63     1     0  77.96        S
##  8         1        0   male    39     0     0   0.00        S
##  9         1        1 female    53     2     0  51.48        S
## 10         1        0   male    71     0     0  49.50        C
## # ... with 1,299 more rows

Task 3 calculate the portion of surviving passengers

select.surviving<-csvtitanic%>%
  select(survived=1)
select.surviving
## # A tibble: 1,309 x 1
##    survived
##       <int>
##  1        1
##  2        1
##  3        1
##  4        1
##  5        1
##  6        1
##  7        1
##  8        1
##  9        1
## 10        1
## # ... with 1,299 more rows

task 4 Calculate the portion of surviving passengers by sex

csvtitanic%>%
  select(csvtitanic$sex)
## # A tibble: 1,309 x 2
##    ï..pclass survived
##        <int>    <int>
##  1         1        1
##  2         1        1
##  3         1        0
##  4         1        0
##  5         1        0
##  6         1        1
##  7         1        1
##  8         1        0
##  9         1        1
## 10         1        0
## # ... with 1,299 more rows

Task 5 Calculate the average age of surviving female passengers

csvtitanic%>%
  filter(age,sex=="female")%>%
  summarise(mean.age=mean(age,ne.rm=TRUE))
## # A tibble: 1 x 1
##   mean.age
##      <dbl>
## 1 28.76744

Task 6 number of survivers less than 10 years of age

filter.survivers<-csvtitanic%>%
  filter(survived,age<10,n())
filter.survivers
## # A tibble: 50 x 8
##    ï..pclass survived    sex   age sibsp parch   fare embarked
##        <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1         1        1   male     1     1     2 151.55        S
##  2         1        1   male     4     0     2  81.86        S
##  3         1        1   male     6     0     2 134.50        C
##  4         2        1   male     1     2     1  39.00        S
##  5         2        1 female     4     2     1  39.00        S
##  6         2        1   male     1     0     2  29.00        S
##  7         2        1 female     8     0     2  26.25        S
##  8         2        1   male     8     1     1  36.75        S
##  9         2        1   male     8     0     2  32.50        S
## 10         2        1   male     1     1     1  14.50        S
## # ... with 40 more rows

Task 7 Calculate min,max,avg age of surviving passangers 10 years old or older

csvtitanic%>%
  filter(age>10,survived)%>%
  summarise(min.age=min(age,na.rm=TRUE),
            max.age=max(age,na.rm=TRUE),
            avg.age=mean(age,na.rm=TRUE))
## # A tibble: 1 x 3
##   min.age max.age  avg.age
##     <dbl>   <dbl>    <dbl>
## 1      11      80 32.26525

Task 8 Calculate portion of passengers by port embarked

csvtitanic%>%
  group_by(embarked,survived)
## # A tibble: 1,309 x 8
## # Groups:   embarked, survived [7]
##    ï..pclass survived    sex   age sibsp parch   fare embarked
##        <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1         1        1 female    29     0     0 211.34        S
##  2         1        1   male     1     1     2 151.55        S
##  3         1        0 female     2     1     2 151.55        S
##  4         1        0   male    30     1     2 151.55        S
##  5         1        0 female    25     1     2 151.55        S
##  6         1        1   male    48     0     0  26.55        S
##  7         1        1 female    63     1     0  77.96        S
##  8         1        0   male    39     0     0   0.00        S
##  9         1        1 female    53     2     0  51.48        S
## 10         1        0   male    71     0     0  49.50        C
## # ... with 1,299 more rows

Task 9 calculate the number of surviving female passangers over 40 years

of age by port

csvtitanic%>%
  filter(sex=="female",age)%>%
  summarise(mean.age=mean(age,na.rm=TRUE))
## # A tibble: 1 x 1
##   mean.age
##      <dbl>
## 1 28.76744

Task 10 Calculate the avg. fare by port

csvtitanic%>%
  group_by(embarked)%>%
  summarise(mean.fare=mean(fare,na.rm=TRUE))
## # A tibble: 4 x 2
##   embarked mean.fare
##     <fctr>     <dbl>
## 1           80.00000
## 2        C  62.33719
## 3        Q  12.40935
## 4        S  27.41963

Task 11 Calculate the number ofsurviving passangers who had siblings

csvtitanic%>%
  filter(survived==1,sibsp)
## # A tibble: 191 x 8
##    ï..pclass survived    sex   age sibsp parch   fare embarked
##        <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1         1        1   male     1     1     2 151.55        S
##  2         1        1 female    63     1     0  77.96        S
##  3         1        1 female    53     2     0  51.48        S
##  4         1        1 female    18     1     0 227.53        C
##  5         1        1   male    37     1     1  52.55        S
##  6         1        1 female    47     1     1  52.55        S
##  7         1        1   male    25     1     0  91.08        C
##  8         1        1 female    19     1     0  91.08        C
##  9         1        1 female    59     2     0  51.48        S
## 10         1        1   male    11     1     2 120.00        S
## # ... with 181 more rows

Task 12 Calculate the number of surviving passangers who had parents/children abord

csvtitanic%>%
  filter(survived==1,parch)
## # A tibble: 164 x 8
##    ï..pclass survived    sex   age sibsp parch   fare embarked
##        <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1         1        1   male     1     1     2 151.55        S
##  2         1        1 female    50     0     1 247.52        C
##  3         1        1   male    37     1     1  52.55        S
##  4         1        1 female    47     1     1  52.55        S
##  5         1        1 female    22     0     1  55.00        S
##  6         1        1   male    36     0     1 512.33        C
##  7         1        1 female    58     0     1 512.33        C
##  8         1        1   male    11     1     2 120.00        S
##  9         1        1 female    14     1     2 120.00        S
## 10         1        1   male    36     1     2 120.00        S
## # ... with 154 more rows

Task 13 Calculate the avg. fare by class

csvtitanic%>%
  filter(fare,ï..pclass)%>%
  group_by(ï..pclass)%>%
  summarise(mean.fare=mean(fare,na.rm=TRUE))
## # A tibble: 3 x 2
##   ï..pclass mean.fare
##       <int>     <dbl>
## 1         1  89.44785
## 2         2  21.64819
## 3         3  13.37973

Task 14 Calculate the frequency distribution of the number of parent/children.

freq.parchil<-table(csvtitanic$parch)
cbind(freq.parchil)
##   freq.parchil
## 0         1002
## 1          170
## 2          113
## 3            8
## 4            6
## 5            6
## 6            2
## 9            2

Task 15 Calculate the regular feq. distribution of the number of siblings/spouses of male passengers who had at least one or more silblings/spouses on board