이 문서는 타이타닉 데이터 분석을 위한 R Markdown 문서이다.
한글을 테스트 하기 위한 목적과 더불에 Markdown의 적용 가능성을 확인해 보고자 한다.
library(data.table)
library(ggplot2)
library(scales)
library(party)
titanic <- read.csv("titanic.csv")
titanic.dt <- as.data.table(titanic)
titanic.dt$survived <- as.factor(titanic.dt$survived)
titanic.dt[, `:=`(isminor, "adult")]
titanic.dt[age < 15, `:=`(isminor, "child")]
titanic.dt$isminor <- as.factor(titanic.dt$isminor)
titanic.dt[, list(prob_as_class = length(which(survived == 1))/nrow(.SD)),
by = pclass]
titanic.dt[, list(prob_as_sex = length(which(survived == 1))/nrow(.SD)),
by = sex]
titanic.dt[, list(prob_as_sex = length(which(survived == 1))/nrow(.SD)),
by = isminor]
survived_pclass_sex <- titanic.dt[, list(cntsurv = length(which(survived ==
1)), cntdie = length(which(survived == 0))), by = list(pclass, sex)][, list(psurvived = cntsurv/(cntsurv +
cntdie)), by = list(pclass, sex)]
survived_pclass_sex_isminor <- titanic.dt[, list(cntsurv = length(which(survived ==
1)), cntdie = length(which(survived == 0))), by = list(pclass, sex, isminor)][,
list(psurvived = cntsurv/(cntsurv + cntdie)), by = list(pclass, sex, isminor)]
survived_pclass_sex_isminor$sex_age <- apply(survived_pclass_sex_isminor[,
list(sex, isminor)], 1, paste, collapse = "_")
ggplot(survived_pclass_sex_isminor, aes(pclass, sex_age)) + geom_tile(aes(fill = psurvived)) +
scale_fill_gradient2("probability of survival", low = muted("white"), high = muted("blue")) +
xlab("grade of class") + ylab("sex and age")
formu <- as.formula("survived ~ sex + isminor + pclass ")
mdl <- ctree(formu, titanic.dt)
plot(mdl)