Insert data into R

titanic <- read.csv("http://www.personal.psu.edu/dlp/w540/titanic540.csv")
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)

creating tibble for data

titanic_tibble <- tbl_df(titanic)
titanic_tibble
## # A tibble: 1,309 x 8
##    pclass survived    sex   age sibsp parch   fare embarked
##     <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1      1        1 female    29     0     0 211.34        S
##  2      1        1   male     1     1     2 151.55        S
##  3      1        0 female     2     1     2 151.55        S
##  4      1        0   male    30     1     2 151.55        S
##  5      1        0 female    25     1     2 151.55        S
##  6      1        1   male    48     0     0  26.55        S
##  7      1        1 female    63     1     0  77.96        S
##  8      1        0   male    39     0     0   0.00        S
##  9      1        1 female    53     2     0  51.48        S
## 10      1        0   male    71     0     0  49.50        C
## # ... with 1,299 more rows

3 calculate proprtion of surviving passengers

prop.surviors <- select(titanic_tibble, survived)
prop.surviors2 <- filter(prop.surviors, survived==1)
prop.survivors3 <- 500/1309*100
prop.survivors3
## [1] 38.1971

4 calculate proportion of survivng pass. by sex (survived pass =1)

prop.table(table(titanic_tibble$sex, titanic_tibble$survived))
##         
##                   0          1
##   female 0.09702063 0.25897632
##   male   0.52100840 0.12299465

5 Calculate mean age of surviving female passengers

female.surv <- filter(titanic_tibble, sex=="female", survived==1, age)
female.surv
## # A tibble: 291 x 8
##    pclass survived    sex   age sibsp parch   fare embarked
##     <int>    <int> <fctr> <int> <int> <int>  <dbl>   <fctr>
##  1      1        1 female    29     0     0 211.34        S
##  2      1        1 female    63     1     0  77.96        S
##  3      1        1 female    53     2     0  51.48        S
##  4      1        1 female    18     1     0 227.53        C
##  5      1        1 female    24     0     0  69.30        C
##  6      1        1 female    26     0     0  78.85        S
##  7      1        1 female    50     0     1 247.52        C
##  8      1        1 female    32     0     0  76.29        C
##  9      1        1 female    47     1     1  52.55        S
## 10      1        1 female    42     0     0 227.53        C
## # ... with 281 more rows
female.surv.mean.age <- mean(female.surv$age, na.rm = TRUE)
female.surv.mean.age
## [1] 29.92096

6 calculate # of survivng passegers 10 years old & younger

young.pass <-select(titanic_tibble, survived, age)
young.pass
## # A tibble: 1,309 x 2
##    survived   age
##       <int> <int>
##  1        1    29
##  2        1     1
##  3        0     2
##  4        0    30
##  5        0    25
##  6        1    48
##  7        1    63
##  8        0    39
##  9        1    53
## 10        0    71
## # ... with 1,299 more rows
young.pass2 <- filter(young.pass, age<=10, survived==1)
young.pass2
## # A tibble: 50 x 2
##    survived   age
##       <int> <int>
##  1        1     1
##  2        1     4
##  3        1     6
##  4        1     1
##  5        1     4
##  6        1     1
##  7        1     8
##  8        1     8
##  9        1     8
## 10        1     1
## # ... with 40 more rows
count(young.pass2)
## # A tibble: 1 x 1
##       n
##   <int>
## 1    50

7 calc max, min, and median age of surviving pass. 10 years & older

olderten <- select(titanic_tibble, age, survived)
olderten2 <- filter(olderten, age>=10, survived==1)
olderten2
## # A tibble: 377 x 2
##      age survived
##    <int>    <int>
##  1    29        1
##  2    48        1
##  3    63        1
##  4    53        1
##  5    18        1
##  6    24        1
##  7    26        1
##  8    80        1
##  9    50        1
## 10    32        1
## # ... with 367 more rows
olderten3 <- olderten2 %>%
  summarise(min.age=min(age), 
            max.age=max(age), 
            mean.age=mean(age))
olderten3
## # A tibble: 1 x 3
##   min.age max.age mean.age
##     <dbl>   <dbl>    <dbl>
## 1      11      80 32.26525

8 Calc. proportion of surviving pass. by port of embark.

9 Calc. # of surviving female pass. over 40 y/o by port of embarkation

10 Calc mean fare pass. pd by port of embark.

mean.fare <- select(titanic_tibble, fare, embarked)
mean.fare2 <- mean.fare %>%
  group_by(embarked)%>%
  summarise(mean.fare=mean(fare, na.rm=TRUE))
mean.fare2
## # A tibble: 4 x 2
##   embarked mean.fare
##     <fctr>     <dbl>
## 1           80.00000
## 2        C  62.33719
## 3        Q  12.40935
## 4        S  27.41963

11 Calc # of surviving pass. who had sibling/spouse aboard

sib.spouse <- select(titanic_tibble, survived, sibsp)
sib.spouse1 <- filter(sib.spouse, survived==1, sibsp>=1)
count(sib.spouse1)
## # A tibble: 1 x 1
##       n
##   <int>
## 1   191

12 Calc. # of survivng pass. who had any parents/children aboard

par.child <- select(titanic_tibble, survived, parch)
par.child2 <-filter(par.child, survived==1, parch>=1)
count(par.child2)
## # A tibble: 1 x 1
##       n
##   <int>
## 1   164

13 Calc mean fare by passenger class

class.fare <- select(titanic_tibble, fare, pclass)
class.fare2 <- class.fare %>%
  group_by(pclass) %>%
  summarise(pclass.fare=mean(fare, na.rm=TRUE))
class.fare2
## # A tibble: 3 x 2
##   pclass pclass.fare
##    <int>       <dbl>
## 1      1    87.50935
## 2      2    21.17928
## 3      3    13.30414

14 Calc. regular freq. dist. of # of parents/child of female pass.

female.parch <- select(titanic_tibble, sex, parch)
female.parch2 <- filter(female.parch, sex=="female")
female.parch3 <- table(female.parch2$parch)
cbind(female.parch3)
##   female.parch3
## 0           293
## 1            88
## 2            69
## 3             6
## 4             4
## 5             4
## 6             1
## 9             1

15 Calc reg freq. dist. of # of siblings/spouses of male pass. who had at least 1 or more sibling/spouse

male.sibsp <- select(titanic_tibble, sex, sibsp)
male.sibsp2 <- filter(male.sibsp, sex=="male", sibsp>=1)
male.sibsp3 <- table(male.sibsp2$sibsp)
cbind(male.sibsp3)
##   male.sibsp3
## 1         159
## 2          23
## 3           8
## 4          15
## 5           4
## 8           5