Importar la base de datos

#file.choose()
titanic <- read.csv("/Users/luisalfredo/Downloads/Titanic.csv")

Entender la base de datos

summary(titanic)
##    titanic               X                 X.1                X.2           
##  Length:892         Length:892         Length:892         Length:892        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      X.3                X.4                X.5                X.6           
##  Length:892         Length:892         Length:892         Length:892        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      X.7                X.8                X.9                X.10          
##  Length:892         Length:892         Length:892         Length:892        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character
str(titanic)
## 'data.frame':    892 obs. of  12 variables:
##  $ titanic: chr  "PassengerId" "1" "2" "3" ...
##  $ X      : chr  "Survived" "0" "1" "1" ...
##  $ X.1    : chr  "Pclass" "3" "1" "3" ...
##  $ X.2    : chr  "Name" "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" ...
##  $ X.3    : chr  "Sex" "male" "female" "female" ...
##  $ X.4    : chr  "Age" "22" "38" "26" ...
##  $ X.5    : chr  "SibSp" "1" "1" "0" ...
##  $ X.6    : chr  "Parch" "0" "0" "0" ...
##  $ X.7    : chr  "Ticket" "A/5 21171" "PC 17599" "STON/O2. 3101282" ...
##  $ X.8    : chr  "Fare" "7.25" "71.2833" "7.925" ...
##  $ X.9    : chr  "Cabin" "" "C85" "" ...
##  $ X.10   : chr  "Embarked" "S" "C" "S" ...

Filtrar base de datos

colnames(titanic) <- c("PassengerId", "Survived", "Pclass", "Name", "Sex", "Age", "SibSp", "Parch", "Ticket", "Fare", "Cabin", "Embarked")


Titanic <- titanic[, c("Pclass","Age","Sex","Survived")]
Titanic$survived <- as.factor(ifelse(Titanic$Survived==0, "Murio", "Sobrevive"))
Titanic$pclass <- as.factor(Titanic$Pclass)
Titanic$sex <-  as.factor(Titanic$Sex)
str(Titanic)
## 'data.frame':    892 obs. of  7 variables:
##  $ Pclass  : chr  "Pclass" "3" "1" "3" ...
##  $ Age     : chr  "Age" "22" "38" "26" ...
##  $ Sex     : chr  "Sex" "male" "female" "female" ...
##  $ Survived: chr  "Survived" "0" "1" "1" ...
##  $ survived: Factor w/ 2 levels "Murio","Sobrevive": 2 1 2 2 2 1 1 1 1 2 ...
##  $ pclass  : Factor w/ 4 levels "1","2","3","Pclass": 4 3 1 3 1 3 3 1 3 3 ...
##  $ sex     : Factor w/ 3 levels "female","male",..: 3 2 1 1 1 2 2 2 2 1 ...
sum(is.na(Titanic))
## [1] 0
sapply(Titanic, function(x) sum(is.na(x)))
##   Pclass      Age      Sex Survived survived   pclass      sex 
##        0        0        0        0        0        0        0
Titanic <- na.omit(Titanic)

Crear arbol de decisión

# install.packages("rpart")
library(rpart)
arbol <- rpart(formula=survived ~ ., data = Titanic)
arbol
## n= 892 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 892 343 Murio (0.6154709 0.3845291)  
##   2) Survived=0 549   0 Murio (1.0000000 0.0000000) *
##   3) Survived=1,Survived 343   0 Sobrevive (0.0000000 1.0000000) *
# install.packages("rpart.plot")
library(rpart.plot)
rpart.plot(arbol)

prp(arbol,extra = 7,prefix = "fraccion")

Conclusiones

  1. Las más altas probabilidades de sobrevivir en el Titanic son niño varón menor de 9.5 años de 1° y 2° clase (100%), y mujeres en 1° y 2° clase (93%).
  2. Las más bajas probabilidades de sobrevivir en el Titanic son los hombres mayores de 9.5 años (18%), y los hombres menores de 9.5 años en 3° clase (38%)
LS0tCnRpdGxlOiAiVGl0YW5pYyIKYXV0aG9yOiAiTHVpcyBBbGZyZWRvIEdvbnphbGV6IENhbnRvIgpkYXRlOiAiMjAyNC0wMi0yMiIKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCgoKIyMgSW1wb3J0YXIgbGEgYmFzZSBkZSBkYXRvcwoKYGBge3J9CiNmaWxlLmNob29zZSgpCnRpdGFuaWMgPC0gcmVhZC5jc3YoIi9Vc2Vycy9sdWlzYWxmcmVkby9Eb3dubG9hZHMvVGl0YW5pYy5jc3YiKQpgYGAKCiMjIEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3MKCmBgYHtyfQpzdW1tYXJ5KHRpdGFuaWMpCnN0cih0aXRhbmljKQpgYGAKCiMjIEZpbHRyYXIgYmFzZSBkZSBkYXRvcwoKYGBge3J9CmNvbG5hbWVzKHRpdGFuaWMpIDwtIGMoIlBhc3NlbmdlcklkIiwgIlN1cnZpdmVkIiwgIlBjbGFzcyIsICJOYW1lIiwgIlNleCIsICJBZ2UiLCAiU2liU3AiLCAiUGFyY2giLCAiVGlja2V0IiwgIkZhcmUiLCAiQ2FiaW4iLCAiRW1iYXJrZWQiKQoKClRpdGFuaWMgPC0gdGl0YW5pY1ssIGMoIlBjbGFzcyIsIkFnZSIsIlNleCIsIlN1cnZpdmVkIildClRpdGFuaWMkc3Vydml2ZWQgPC0gYXMuZmFjdG9yKGlmZWxzZShUaXRhbmljJFN1cnZpdmVkPT0wLCAiTXVyaW8iLCAiU29icmV2aXZlIikpClRpdGFuaWMkcGNsYXNzIDwtIGFzLmZhY3RvcihUaXRhbmljJFBjbGFzcykKVGl0YW5pYyRzZXggPC0gIGFzLmZhY3RvcihUaXRhbmljJFNleCkKc3RyKFRpdGFuaWMpCgpzdW0oaXMubmEoVGl0YW5pYykpCnNhcHBseShUaXRhbmljLCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkpKQoKVGl0YW5pYyA8LSBuYS5vbWl0KFRpdGFuaWMpCgpgYGAKCiMjIENyZWFyIGFyYm9sIGRlIGRlY2lzacOzbgoKYGBge3J9CiMgaW5zdGFsbC5wYWNrYWdlcygicnBhcnQiKQpsaWJyYXJ5KHJwYXJ0KQphcmJvbCA8LSBycGFydChmb3JtdWxhPXN1cnZpdmVkIH4gLiwgZGF0YSA9IFRpdGFuaWMpCmFyYm9sCgojIGluc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCnJwYXJ0LnBsb3QoYXJib2wpCnBycChhcmJvbCxleHRyYSA9IDcscHJlZml4ID0gImZyYWNjaW9uIikKYGBgCgojIyBDb25jbHVzaW9uZXMKCjEuIExhcyBtw6FzIGFsdGFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBzb24gbmnDsW8gdmFyw7NuIG1lbm9yIGRlIDkuNSBhw7FvcyBkZSAxwrAgeSAywrAgY2xhc2UgKDEwMCUpLCB5IG11amVyZXMgZW4gMcKwIHkgMsKwIGNsYXNlICg5MyUpLiAgCjIuIExhcyBtw6FzIGJhamFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBzb24gbG9zIGhvbWJyZXMgbWF5b3JlcyBkZSA5LjUgYcOxb3MgKDE4JSksIHkgbG9zIGhvbWJyZXMgbWVub3JlcyBkZSA5LjUgYcOxb3MgZW4gM8KwIGNsYXNlICgzOCUpCgoK