# Instalar paquetes y llamar librerĆ­as
#install.packages("rpart")
#install.packages("rpart.plot")

library(rpart)
library(rpart.plot)

# Importar la base de datos
titanic <- read.csv("/Users/pablosancho/Desktop/Concentración/Modulo 2 Concentracion (R)/titanic.csv")

## Entender la base de datos
summary(titanic)
##      pclass         survived         name               sex           
##  Min.   :1.000   Min.   :0.000   Length:1310        Length:1310       
##  1st Qu.:2.000   1st Qu.:0.000   Class :character   Class :character  
##  Median :3.000   Median :0.000   Mode  :character   Mode  :character  
##  Mean   :2.295   Mean   :0.382                                        
##  3rd Qu.:3.000   3rd Qu.:1.000                                        
##  Max.   :3.000   Max.   :1.000                                        
##  NA's   :1       NA's   :1                                            
##       age              sibsp            parch          ticket         
##  Min.   : 0.1667   Min.   :0.0000   Min.   :0.000   Length:1310       
##  1st Qu.:21.0000   1st Qu.:0.0000   1st Qu.:0.000   Class :character  
##  Median :28.0000   Median :0.0000   Median :0.000   Mode  :character  
##  Mean   :29.8811   Mean   :0.4989   Mean   :0.385                     
##  3rd Qu.:39.0000   3rd Qu.:1.0000   3rd Qu.:0.000                     
##  Max.   :80.0000   Max.   :8.0000   Max.   :9.000                     
##  NA's   :264       NA's   :1        NA's   :1                         
##       fare            cabin             embarked             boat          
##  Min.   :  0.000   Length:1310        Length:1310        Length:1310       
##  1st Qu.:  7.896   Class :character   Class :character   Class :character  
##  Median : 14.454   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 33.295                                                           
##  3rd Qu.: 31.275                                                           
##  Max.   :512.329                                                           
##  NA's   :2                                                                 
##       body        home.dest        
##  Min.   :  1.0   Length:1310       
##  1st Qu.: 72.0   Class :character  
##  Median :155.0   Mode  :character  
##  Mean   :160.8                     
##  3rd Qu.:256.0                     
##  Max.   :328.0                     
##  NA's   :1189
str(titanic)
## 'data.frame':    1310 obs. of  14 variables:
##  $ pclass   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ survived : int  1 1 0 0 0 1 1 0 1 0 ...
##  $ name     : chr  "Allen, Miss. Elisabeth Walton" "Allison, Master. Hudson Trevor" "Allison, Miss. Helen Loraine" "Allison, Mr. Hudson Joshua Creighton" ...
##  $ sex      : chr  "female" "male" "female" "male" ...
##  $ age      : num  29 0.917 2 30 25 ...
##  $ sibsp    : int  0 1 1 1 1 0 1 0 2 0 ...
##  $ parch    : int  0 2 2 2 2 0 0 0 0 0 ...
##  $ ticket   : chr  "24160" "113781" "113781" "113781" ...
##  $ fare     : num  211 152 152 152 152 ...
##  $ cabin    : chr  "B5" "C22 C26" "C22 C26" "C22 C26" ...
##  $ embarked : chr  "S" "S" "S" "S" ...
##  $ boat     : chr  "2" "11" "" "" ...
##  $ body     : int  NA NA NA 135 NA NA NA NA NA 22 ...
##  $ home.dest: chr  "St Louis, MO" "Montreal, PQ / Chesterville, ON" "Montreal, PQ / Chesterville, ON" "Montreal, PQ / Chesterville, ON" ...
head(titanic)
##   pclass survived                                            name    sex
## 1      1        1                   Allen, Miss. Elisabeth Walton female
## 2      1        1                  Allison, Master. Hudson Trevor   male
## 3      1        0                    Allison, Miss. Helen Loraine female
## 4      1        0            Allison, Mr. Hudson Joshua Creighton   male
## 5      1        0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female
## 6      1        1                             Anderson, Mr. Harry   male
##       age sibsp parch ticket     fare   cabin embarked boat body
## 1 29.0000     0     0  24160 211.3375      B5        S    2   NA
## 2  0.9167     1     2 113781 151.5500 C22 C26        S   11   NA
## 3  2.0000     1     2 113781 151.5500 C22 C26        S        NA
## 4 30.0000     1     2 113781 151.5500 C22 C26        S       135
## 5 25.0000     1     2 113781 151.5500 C22 C26        S        NA
## 6 48.0000     0     0  19952  26.5500     E12        S    3   NA
##                         home.dest
## 1                    St Louis, MO
## 2 Montreal, PQ / Chesterville, ON
## 3 Montreal, PQ / Chesterville, ON
## 4 Montreal, PQ / Chesterville, ON
## 5 Montreal, PQ / Chesterville, ON
## 6                    New York, NY

Crear Arbol de Decisiones

titanic <- titanic[,c("pclass", "age", "sex", "survived")]
titanic$survived <- as.factor(titanic$survived)
titanic$pclass <- as.factor(titanic$pclass)
str(titanic)
## 'data.frame':    1310 obs. of  4 variables:
##  $ pclass  : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ age     : num  29 0.917 2 30 25 ...
##  $ sex     : chr  "female" "male" "female" "male" ...
##  $ survived: Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 1 2 1 ...
arbol_titanic <- rpart(survived~., data=titanic)
rpart.plot(arbol_titanic)

prp(arbol_titanic, extra=7, prefix="fracción/n")

Conclusiones

# 0 y 1 representan si y no. (no importa cual es cual)
# 0.38 es la proporcion de gente que sobrevivio, asimismo 0.73 es el porcentaje de mujeres que sobrevivieron
# el 100% es la cantidad de la gente que entra en la sample (sobrevivieron), asimismo 64% son hombres, 3% son hombres menores de 9.5 etc.

#En conclusion, las probabilidades mas altas de sobrevivir en el naufragio del Titanic son de los hombres menores de 9.5 aƱos, que no estan en la tercera clase. El segundo es ser mujer (73%).
LS0tCnRpdGxlOiAiVGl0YW5pYyAoQXJib2wgZGUgRGVjaXNpb25lcykiCmF1dGhvcjogIlBhYmxvIFNhbmNobyBBMDE3MjIyMzYiCm91dHB1dDogCiAgaHRtbF9kb2N1bWVudDogCiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgY29kZV9kb3dubG9hZDogVFJVRQogICAgdGhlbWU6IGRhcmtseQpkYXRlOiAiMjAyNS0wOC0yMCIKLS0tCgohW10oaHR0cHM6Ly9tLm1lZGlhLWFtYXpvbi5jb20vaW1hZ2VzL00vTVY1Qll6WXlOMkZpWm1VdFlXWXpNeTAwTXpWaUxXSmtaVE10T0dZMVpqZ3pOV013TjJZeFhrRXlYa0ZxY0djQC5fVjFfLmpwZykKCmBgYHtyfQojIEluc3RhbGFyIHBhcXVldGVzIHkgbGxhbWFyIGxpYnJlcsOtYXMKI2luc3RhbGwucGFja2FnZXMoInJwYXJ0IikKI2luc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQoKbGlicmFyeShycGFydCkKbGlicmFyeShycGFydC5wbG90KQoKIyBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zCnRpdGFuaWMgPC0gcmVhZC5jc3YoIi9Vc2Vycy9wYWJsb3NhbmNoby9EZXNrdG9wL0NvbmNlbnRyYWNpb8yBbi9Nb2R1bG8gMiBDb25jZW50cmFjaW9uIChSKS90aXRhbmljLmNzdiIpCgojIyBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zCnN1bW1hcnkodGl0YW5pYykKc3RyKHRpdGFuaWMpCmhlYWQodGl0YW5pYykKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IENyZWFyIEFyYm9sIGRlIERlY2lzaW9uZXMgPC9zcGFuPgpgYGB7cn0KdGl0YW5pYyA8LSB0aXRhbmljWyxjKCJwY2xhc3MiLCAiYWdlIiwgInNleCIsICJzdXJ2aXZlZCIpXQp0aXRhbmljJHN1cnZpdmVkIDwtIGFzLmZhY3Rvcih0aXRhbmljJHN1cnZpdmVkKQp0aXRhbmljJHBjbGFzcyA8LSBhcy5mYWN0b3IodGl0YW5pYyRwY2xhc3MpCnN0cih0aXRhbmljKQphcmJvbF90aXRhbmljIDwtIHJwYXJ0KHN1cnZpdmVkfi4sIGRhdGE9dGl0YW5pYykKcnBhcnQucGxvdChhcmJvbF90aXRhbmljKQpwcnAoYXJib2xfdGl0YW5pYywgZXh0cmE9NywgcHJlZml4PSJmcmFjY2nDs24vbiIpCmBgYAoKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWU7Ij4gQ29uY2x1c2lvbmVzIDwvc3Bhbj4KYGBge3J9CiMgMCB5IDEgcmVwcmVzZW50YW4gc2kgeSBuby4gKG5vIGltcG9ydGEgY3VhbCBlcyBjdWFsKQojIDAuMzggZXMgbGEgcHJvcG9yY2lvbiBkZSBnZW50ZSBxdWUgc29icmV2aXZpbywgYXNpbWlzbW8gMC43MyBlcyBlbCBwb3JjZW50YWplIGRlIG11amVyZXMgcXVlIHNvYnJldml2aWVyb24KIyBlbCAxMDAlIGVzIGxhIGNhbnRpZGFkIGRlIGxhIGdlbnRlIHF1ZSBlbnRyYSBlbiBsYSBzYW1wbGUgKHNvYnJldml2aWVyb24pLCBhc2ltaXNtbyA2NCUgc29uIGhvbWJyZXMsIDMlIHNvbiBob21icmVzIG1lbm9yZXMgZGUgOS41IGV0Yy4KCiNFbiBjb25jbHVzaW9uLCBsYXMgcHJvYmFiaWxpZGFkZXMgbWFzIGFsdGFzIGRlIHNvYnJldml2aXIgZW4gZWwgbmF1ZnJhZ2lvIGRlbCBUaXRhbmljIHNvbiBkZSBsb3MgaG9tYnJlcyBtZW5vcmVzIGRlIDkuNSBhw7FvcywgcXVlIG5vIGVzdGFuIGVuIGxhIHRlcmNlcmEgY2xhc2UuIEVsIHNlZ3VuZG8gZXMgc2VyIG11amVyICg3MyUpLgpgYGAKCgo=