
# Instalar paquetes y llamar librerĆas
#install.packages("rpart")
#install.packages("rpart.plot")
library(rpart)
library(rpart.plot)
# Importar la base de datos
titanic <- read.csv("/Users/pablosancho/Desktop/ConcentracioĢn/Modulo 2 Concentracion (R)/titanic.csv")
## Entender la base de datos
summary(titanic)
## pclass survived name sex
## Min. :1.000 Min. :0.000 Length:1310 Length:1310
## 1st Qu.:2.000 1st Qu.:0.000 Class :character Class :character
## Median :3.000 Median :0.000 Mode :character Mode :character
## Mean :2.295 Mean :0.382
## 3rd Qu.:3.000 3rd Qu.:1.000
## Max. :3.000 Max. :1.000
## NA's :1 NA's :1
## age sibsp parch ticket
## Min. : 0.1667 Min. :0.0000 Min. :0.000 Length:1310
## 1st Qu.:21.0000 1st Qu.:0.0000 1st Qu.:0.000 Class :character
## Median :28.0000 Median :0.0000 Median :0.000 Mode :character
## Mean :29.8811 Mean :0.4989 Mean :0.385
## 3rd Qu.:39.0000 3rd Qu.:1.0000 3rd Qu.:0.000
## Max. :80.0000 Max. :8.0000 Max. :9.000
## NA's :264 NA's :1 NA's :1
## fare cabin embarked boat
## Min. : 0.000 Length:1310 Length:1310 Length:1310
## 1st Qu.: 7.896 Class :character Class :character Class :character
## Median : 14.454 Mode :character Mode :character Mode :character
## Mean : 33.295
## 3rd Qu.: 31.275
## Max. :512.329
## NA's :2
## body home.dest
## Min. : 1.0 Length:1310
## 1st Qu.: 72.0 Class :character
## Median :155.0 Mode :character
## Mean :160.8
## 3rd Qu.:256.0
## Max. :328.0
## NA's :1189
str(titanic)
## 'data.frame': 1310 obs. of 14 variables:
## $ pclass : int 1 1 1 1 1 1 1 1 1 1 ...
## $ survived : int 1 1 0 0 0 1 1 0 1 0 ...
## $ name : chr "Allen, Miss. Elisabeth Walton" "Allison, Master. Hudson Trevor" "Allison, Miss. Helen Loraine" "Allison, Mr. Hudson Joshua Creighton" ...
## $ sex : chr "female" "male" "female" "male" ...
## $ age : num 29 0.917 2 30 25 ...
## $ sibsp : int 0 1 1 1 1 0 1 0 2 0 ...
## $ parch : int 0 2 2 2 2 0 0 0 0 0 ...
## $ ticket : chr "24160" "113781" "113781" "113781" ...
## $ fare : num 211 152 152 152 152 ...
## $ cabin : chr "B5" "C22 C26" "C22 C26" "C22 C26" ...
## $ embarked : chr "S" "S" "S" "S" ...
## $ boat : chr "2" "11" "" "" ...
## $ body : int NA NA NA 135 NA NA NA NA NA 22 ...
## $ home.dest: chr "St Louis, MO" "Montreal, PQ / Chesterville, ON" "Montreal, PQ / Chesterville, ON" "Montreal, PQ / Chesterville, ON" ...
head(titanic)
## pclass survived name sex
## 1 1 1 Allen, Miss. Elisabeth Walton female
## 2 1 1 Allison, Master. Hudson Trevor male
## 3 1 0 Allison, Miss. Helen Loraine female
## 4 1 0 Allison, Mr. Hudson Joshua Creighton male
## 5 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female
## 6 1 1 Anderson, Mr. Harry male
## age sibsp parch ticket fare cabin embarked boat body
## 1 29.0000 0 0 24160 211.3375 B5 S 2 NA
## 2 0.9167 1 2 113781 151.5500 C22 C26 S 11 NA
## 3 2.0000 1 2 113781 151.5500 C22 C26 S NA
## 4 30.0000 1 2 113781 151.5500 C22 C26 S 135
## 5 25.0000 1 2 113781 151.5500 C22 C26 S NA
## 6 48.0000 0 0 19952 26.5500 E12 S 3 NA
## home.dest
## 1 St Louis, MO
## 2 Montreal, PQ / Chesterville, ON
## 3 Montreal, PQ / Chesterville, ON
## 4 Montreal, PQ / Chesterville, ON
## 5 Montreal, PQ / Chesterville, ON
## 6 New York, NY
Crear Arbol de Decisiones
titanic <- titanic[,c("pclass", "age", "sex", "survived")]
titanic$survived <- as.factor(titanic$survived)
titanic$pclass <- as.factor(titanic$pclass)
str(titanic)
## 'data.frame': 1310 obs. of 4 variables:
## $ pclass : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ age : num 29 0.917 2 30 25 ...
## $ sex : chr "female" "male" "female" "male" ...
## $ survived: Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 1 2 1 ...
arbol_titanic <- rpart(survived~., data=titanic)
rpart.plot(arbol_titanic)

prp(arbol_titanic, extra=7, prefix="fracción/n")

Conclusiones
# 0 y 1 representan si y no. (no importa cual es cual)
# 0.38 es la proporcion de gente que sobrevivio, asimismo 0.73 es el porcentaje de mujeres que sobrevivieron
# el 100% es la cantidad de la gente que entra en la sample (sobrevivieron), asimismo 64% son hombres, 3% son hombres menores de 9.5 etc.
#En conclusion, las probabilidades mas altas de sobrevivir en el naufragio del Titanic son de los hombres menores de 9.5 aƱos, que no estan en la tercera clase. El segundo es ser mujer (73%).
LS0tCnRpdGxlOiAiVGl0YW5pYyAoQXJib2wgZGUgRGVjaXNpb25lcykiCmF1dGhvcjogIlBhYmxvIFNhbmNobyBBMDE3MjIyMzYiCm91dHB1dDogCiAgaHRtbF9kb2N1bWVudDogCiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgY29kZV9kb3dubG9hZDogVFJVRQogICAgdGhlbWU6IGRhcmtseQpkYXRlOiAiMjAyNS0wOC0yMCIKLS0tCgohW10oaHR0cHM6Ly9tLm1lZGlhLWFtYXpvbi5jb20vaW1hZ2VzL00vTVY1Qll6WXlOMkZpWm1VdFlXWXpNeTAwTXpWaUxXSmtaVE10T0dZMVpqZ3pOV013TjJZeFhrRXlYa0ZxY0djQC5fVjFfLmpwZykKCmBgYHtyfQojIEluc3RhbGFyIHBhcXVldGVzIHkgbGxhbWFyIGxpYnJlcsOtYXMKI2luc3RhbGwucGFja2FnZXMoInJwYXJ0IikKI2luc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQoKbGlicmFyeShycGFydCkKbGlicmFyeShycGFydC5wbG90KQoKIyBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zCnRpdGFuaWMgPC0gcmVhZC5jc3YoIi9Vc2Vycy9wYWJsb3NhbmNoby9EZXNrdG9wL0NvbmNlbnRyYWNpb8yBbi9Nb2R1bG8gMiBDb25jZW50cmFjaW9uIChSKS90aXRhbmljLmNzdiIpCgojIyBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zCnN1bW1hcnkodGl0YW5pYykKc3RyKHRpdGFuaWMpCmhlYWQodGl0YW5pYykKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IENyZWFyIEFyYm9sIGRlIERlY2lzaW9uZXMgPC9zcGFuPgpgYGB7cn0KdGl0YW5pYyA8LSB0aXRhbmljWyxjKCJwY2xhc3MiLCAiYWdlIiwgInNleCIsICJzdXJ2aXZlZCIpXQp0aXRhbmljJHN1cnZpdmVkIDwtIGFzLmZhY3Rvcih0aXRhbmljJHN1cnZpdmVkKQp0aXRhbmljJHBjbGFzcyA8LSBhcy5mYWN0b3IodGl0YW5pYyRwY2xhc3MpCnN0cih0aXRhbmljKQphcmJvbF90aXRhbmljIDwtIHJwYXJ0KHN1cnZpdmVkfi4sIGRhdGE9dGl0YW5pYykKcnBhcnQucGxvdChhcmJvbF90aXRhbmljKQpwcnAoYXJib2xfdGl0YW5pYywgZXh0cmE9NywgcHJlZml4PSJmcmFjY2nDs24vbiIpCmBgYAoKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWU7Ij4gQ29uY2x1c2lvbmVzIDwvc3Bhbj4KYGBge3J9CiMgMCB5IDEgcmVwcmVzZW50YW4gc2kgeSBuby4gKG5vIGltcG9ydGEgY3VhbCBlcyBjdWFsKQojIDAuMzggZXMgbGEgcHJvcG9yY2lvbiBkZSBnZW50ZSBxdWUgc29icmV2aXZpbywgYXNpbWlzbW8gMC43MyBlcyBlbCBwb3JjZW50YWplIGRlIG11amVyZXMgcXVlIHNvYnJldml2aWVyb24KIyBlbCAxMDAlIGVzIGxhIGNhbnRpZGFkIGRlIGxhIGdlbnRlIHF1ZSBlbnRyYSBlbiBsYSBzYW1wbGUgKHNvYnJldml2aWVyb24pLCBhc2ltaXNtbyA2NCUgc29uIGhvbWJyZXMsIDMlIHNvbiBob21icmVzIG1lbm9yZXMgZGUgOS41IGV0Yy4KCiNFbiBjb25jbHVzaW9uLCBsYXMgcHJvYmFiaWxpZGFkZXMgbWFzIGFsdGFzIGRlIHNvYnJldml2aXIgZW4gZWwgbmF1ZnJhZ2lvIGRlbCBUaXRhbmljIHNvbiBkZSBsb3MgaG9tYnJlcyBtZW5vcmVzIGRlIDkuNSBhw7FvcywgcXVlIG5vIGVzdGFuIGVuIGxhIHRlcmNlcmEgY2xhc2UuIEVsIHNlZ3VuZG8gZXMgc2VyIG11amVyICg3MyUpLgpgYGAKCgo=