Insert data into R
titanic <- read.csv("http://www.personal.psu.edu/dlp/w540/titanic540.csv")
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
creating tibble for data
titanic_tibble <- tbl_df(titanic)
titanic_tibble
## # A tibble: 1,309 x 8
## pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 female 29 0 0 211.34 S
## 2 1 1 male 1 1 2 151.55 S
## 3 1 0 female 2 1 2 151.55 S
## 4 1 0 male 30 1 2 151.55 S
## 5 1 0 female 25 1 2 151.55 S
## 6 1 1 male 48 0 0 26.55 S
## 7 1 1 female 63 1 0 77.96 S
## 8 1 0 male 39 0 0 0.00 S
## 9 1 1 female 53 2 0 51.48 S
## 10 1 0 male 71 0 0 49.50 C
## # ... with 1,299 more rows
3 calculate proprtion of surviving passengers
prop.surviors <- select(titanic_tibble, survived)
prop.surviors2 <- filter(prop.surviors, survived==1)
prop.survivors3 <- 500/1309*100
prop.survivors3
## [1] 38.1971
4 calculate proportion of survivng pass. by sex (survived pass =1)
prop.table(table(titanic_tibble$sex, titanic_tibble$survived))
##
## 0 1
## female 0.09702063 0.25897632
## male 0.52100840 0.12299465
5 Calculate mean age of surviving female passengers
female.surv <- filter(titanic_tibble, sex=="female", survived==1, age)
female.surv
## # A tibble: 291 x 8
## pclass survived sex age sibsp parch fare embarked
## <int> <int> <fctr> <int> <int> <int> <dbl> <fctr>
## 1 1 1 female 29 0 0 211.34 S
## 2 1 1 female 63 1 0 77.96 S
## 3 1 1 female 53 2 0 51.48 S
## 4 1 1 female 18 1 0 227.53 C
## 5 1 1 female 24 0 0 69.30 C
## 6 1 1 female 26 0 0 78.85 S
## 7 1 1 female 50 0 1 247.52 C
## 8 1 1 female 32 0 0 76.29 C
## 9 1 1 female 47 1 1 52.55 S
## 10 1 1 female 42 0 0 227.53 C
## # ... with 281 more rows
female.surv.mean.age <- mean(female.surv$age, na.rm = TRUE)
female.surv.mean.age
## [1] 29.92096
6 calculate # of survivng passegers 10 years old & younger
young.pass <-select(titanic_tibble, survived, age)
young.pass
## # A tibble: 1,309 x 2
## survived age
## <int> <int>
## 1 1 29
## 2 1 1
## 3 0 2
## 4 0 30
## 5 0 25
## 6 1 48
## 7 1 63
## 8 0 39
## 9 1 53
## 10 0 71
## # ... with 1,299 more rows
young.pass2 <- filter(young.pass, age<=10, survived==1)
young.pass2
## # A tibble: 50 x 2
## survived age
## <int> <int>
## 1 1 1
## 2 1 4
## 3 1 6
## 4 1 1
## 5 1 4
## 6 1 1
## 7 1 8
## 8 1 8
## 9 1 8
## 10 1 1
## # ... with 40 more rows
count(young.pass2)
## # A tibble: 1 x 1
## n
## <int>
## 1 50
8 Calc. proportion of surviving pass. by port of embark.
9 Calc. # of surviving female pass. over 40 y/o by port of embarkation
10 Calc mean fare pass. pd by port of embark.
mean.fare <- select(titanic_tibble, fare, embarked)
mean.fare2 <- mean.fare %>%
group_by(embarked)%>%
summarise(mean.fare=mean(fare, na.rm=TRUE))
mean.fare2
## # A tibble: 4 x 2
## embarked mean.fare
## <fctr> <dbl>
## 1 80.00000
## 2 C 62.33719
## 3 Q 12.40935
## 4 S 27.41963
11 Calc # of surviving pass. who had sibling/spouse aboard
sib.spouse <- select(titanic_tibble, survived, sibsp)
sib.spouse1 <- filter(sib.spouse, survived==1, sibsp>=1)
count(sib.spouse1)
## # A tibble: 1 x 1
## n
## <int>
## 1 191
12 Calc. # of survivng pass. who had any parents/children aboard
par.child <- select(titanic_tibble, survived, parch)
par.child2 <-filter(par.child, survived==1, parch>=1)
count(par.child2)
## # A tibble: 1 x 1
## n
## <int>
## 1 164
13 Calc mean fare by passenger class
class.fare <- select(titanic_tibble, fare, pclass)
class.fare2 <- class.fare %>%
group_by(pclass) %>%
summarise(pclass.fare=mean(fare, na.rm=TRUE))
class.fare2
## # A tibble: 3 x 2
## pclass pclass.fare
## <int> <dbl>
## 1 1 87.50935
## 2 2 21.17928
## 3 3 13.30414
14 Calc. regular freq. dist. of # of parents/child of female pass.
female.parch <- select(titanic_tibble, sex, parch)
female.parch2 <- filter(female.parch, sex=="female")
female.parch3 <- table(female.parch2$parch)
cbind(female.parch3)
## female.parch3
## 0 293
## 1 88
## 2 69
## 3 6
## 4 4
## 5 4
## 6 1
## 9 1
15 Calc reg freq. dist. of # of siblings/spouses of male pass. who had at least 1 or more sibling/spouse
male.sibsp <- select(titanic_tibble, sex, sibsp)
male.sibsp2 <- filter(male.sibsp, sex=="male", sibsp>=1)
male.sibsp3 <- table(male.sibsp2$sibsp)
cbind(male.sibsp3)
## male.sibsp3
## 1 159
## 2 23
## 3 8
## 4 15
## 5 4
## 8 5