Load the rpart, rattle, rpart.plot and RColorBrewer package
library(rpart)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(dplyr)
Split Data 90% , 10%
data = read.csv("titanic.csv")
train_ind = sample(891, 800)
train = data[train_ind,]
test_ind = sample(891, 91)
test = data[test_ind,]
Select Variable in data
train = train %>% select(Survived, Pclass, Sex, Age)
test = test %>% select(Survived,Pclass, Sex, Age)
Build a tree model: tree
tree <- rpart(Survived ~ ., method = "class", data = train)
Draw the decision tree
pred <- predict(tree, test, type = "class")
Construct the confusion matrix: conf
conf <- table(test$Survived,predict(tree,test, type = "class"))
conf
##
## 0 1
## 0 48 9
## 1 7 27
Print out the accuracy
sum(diag(conf))/sum(conf)
## [1] 0.8241758
Prune the tree: pruned
pruned <- prune(tree, cp = 0.05)
fancyRpartPlot(pruned)