Importar la base de datos
#file.choose()
titanic <- read.csv("/Users/luisalfredo/Downloads/Titanic.csv")
Entender la base de datos
summary(titanic)
## titanic X X.1 X.2
## Length:892 Length:892 Length:892 Length:892
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X.3 X.4 X.5 X.6
## Length:892 Length:892 Length:892 Length:892
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X.7 X.8 X.9 X.10
## Length:892 Length:892 Length:892 Length:892
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
str(titanic)
## 'data.frame': 892 obs. of 12 variables:
## $ titanic: chr "PassengerId" "1" "2" "3" ...
## $ X : chr "Survived" "0" "1" "1" ...
## $ X.1 : chr "Pclass" "3" "1" "3" ...
## $ X.2 : chr "Name" "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" ...
## $ X.3 : chr "Sex" "male" "female" "female" ...
## $ X.4 : chr "Age" "22" "38" "26" ...
## $ X.5 : chr "SibSp" "1" "1" "0" ...
## $ X.6 : chr "Parch" "0" "0" "0" ...
## $ X.7 : chr "Ticket" "A/5 21171" "PC 17599" "STON/O2. 3101282" ...
## $ X.8 : chr "Fare" "7.25" "71.2833" "7.925" ...
## $ X.9 : chr "Cabin" "" "C85" "" ...
## $ X.10 : chr "Embarked" "S" "C" "S" ...
Filtrar base de datos
colnames(titanic) <- c("PassengerId", "Survived", "Pclass", "Name", "Sex", "Age", "SibSp", "Parch", "Ticket", "Fare", "Cabin", "Embarked")
Titanic <- titanic[, c("Pclass","Age","Sex","Survived")]
Titanic$survived <- as.factor(ifelse(Titanic$Survived==0, "Murio", "Sobrevive"))
Titanic$pclass <- as.factor(Titanic$Pclass)
Titanic$sex <- as.factor(Titanic$Sex)
str(Titanic)
## 'data.frame': 892 obs. of 7 variables:
## $ Pclass : chr "Pclass" "3" "1" "3" ...
## $ Age : chr "Age" "22" "38" "26" ...
## $ Sex : chr "Sex" "male" "female" "female" ...
## $ Survived: chr "Survived" "0" "1" "1" ...
## $ survived: Factor w/ 2 levels "Murio","Sobrevive": 2 1 2 2 2 1 1 1 1 2 ...
## $ pclass : Factor w/ 4 levels "1","2","3","Pclass": 4 3 1 3 1 3 3 1 3 3 ...
## $ sex : Factor w/ 3 levels "female","male",..: 3 2 1 1 1 2 2 2 2 1 ...
sum(is.na(Titanic))
## [1] 0
sapply(Titanic, function(x) sum(is.na(x)))
## Pclass Age Sex Survived survived pclass sex
## 0 0 0 0 0 0 0
Titanic <- na.omit(Titanic)
Crear arbol de decisión
# install.packages("rpart")
library(rpart)
arbol <- rpart(formula=survived ~ ., data = Titanic)
arbol
## n= 892
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 892 343 Murio (0.6154709 0.3845291)
## 2) Survived=0 549 0 Murio (1.0000000 0.0000000) *
## 3) Survived=1,Survived 343 0 Sobrevive (0.0000000 1.0000000) *
# install.packages("rpart.plot")
library(rpart.plot)
rpart.plot(arbol)

prp(arbol,extra = 7,prefix = "fraccion")

Conclusiones
- Las más altas probabilidades de sobrevivir en el Titanic son niño
varón menor de 9.5 años de 1° y 2° clase (100%), y mujeres en 1° y 2°
clase (93%).
- Las más bajas probabilidades de sobrevivir en el Titanic son los
hombres mayores de 9.5 años (18%), y los hombres menores de 9.5 años en
3° clase (38%)
LS0tCnRpdGxlOiAiVGl0YW5pYyIKYXV0aG9yOiAiTHVpcyBBbGZyZWRvIEdvbnphbGV6IENhbnRvIgpkYXRlOiAiMjAyNC0wMi0yMiIKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCgoKIyMgSW1wb3J0YXIgbGEgYmFzZSBkZSBkYXRvcwoKYGBge3J9CiNmaWxlLmNob29zZSgpCnRpdGFuaWMgPC0gcmVhZC5jc3YoIi9Vc2Vycy9sdWlzYWxmcmVkby9Eb3dubG9hZHMvVGl0YW5pYy5jc3YiKQpgYGAKCiMjIEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3MKCmBgYHtyfQpzdW1tYXJ5KHRpdGFuaWMpCnN0cih0aXRhbmljKQpgYGAKCiMjIEZpbHRyYXIgYmFzZSBkZSBkYXRvcwoKYGBge3J9CmNvbG5hbWVzKHRpdGFuaWMpIDwtIGMoIlBhc3NlbmdlcklkIiwgIlN1cnZpdmVkIiwgIlBjbGFzcyIsICJOYW1lIiwgIlNleCIsICJBZ2UiLCAiU2liU3AiLCAiUGFyY2giLCAiVGlja2V0IiwgIkZhcmUiLCAiQ2FiaW4iLCAiRW1iYXJrZWQiKQoKClRpdGFuaWMgPC0gdGl0YW5pY1ssIGMoIlBjbGFzcyIsIkFnZSIsIlNleCIsIlN1cnZpdmVkIildClRpdGFuaWMkc3Vydml2ZWQgPC0gYXMuZmFjdG9yKGlmZWxzZShUaXRhbmljJFN1cnZpdmVkPT0wLCAiTXVyaW8iLCAiU29icmV2aXZlIikpClRpdGFuaWMkcGNsYXNzIDwtIGFzLmZhY3RvcihUaXRhbmljJFBjbGFzcykKVGl0YW5pYyRzZXggPC0gIGFzLmZhY3RvcihUaXRhbmljJFNleCkKc3RyKFRpdGFuaWMpCgpzdW0oaXMubmEoVGl0YW5pYykpCnNhcHBseShUaXRhbmljLCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkpKQoKVGl0YW5pYyA8LSBuYS5vbWl0KFRpdGFuaWMpCgpgYGAKCiMjIENyZWFyIGFyYm9sIGRlIGRlY2lzacOzbgoKYGBge3J9CiMgaW5zdGFsbC5wYWNrYWdlcygicnBhcnQiKQpsaWJyYXJ5KHJwYXJ0KQphcmJvbCA8LSBycGFydChmb3JtdWxhPXN1cnZpdmVkIH4gLiwgZGF0YSA9IFRpdGFuaWMpCmFyYm9sCgojIGluc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCnJwYXJ0LnBsb3QoYXJib2wpCnBycChhcmJvbCxleHRyYSA9IDcscHJlZml4ID0gImZyYWNjaW9uIikKYGBgCgojIyBDb25jbHVzaW9uZXMKCjEuIExhcyBtw6FzIGFsdGFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBzb24gbmnDsW8gdmFyw7NuIG1lbm9yIGRlIDkuNSBhw7FvcyBkZSAxwrAgeSAywrAgY2xhc2UgKDEwMCUpLCB5IG11amVyZXMgZW4gMcKwIHkgMsKwIGNsYXNlICg5MyUpLiAgCjIuIExhcyBtw6FzIGJhamFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBzb24gbG9zIGhvbWJyZXMgbWF5b3JlcyBkZSA5LjUgYcOxb3MgKDE4JSksIHkgbG9zIGhvbWJyZXMgbWVub3JlcyBkZSA5LjUgYcOxb3MgZW4gM8KwIGNsYXNlICgzOCUpCgoK