Importación base de datos

titanic <-read.csv("~/UNIVERSIDAD/CONCENTRACIÓN/titanic.csv")

Exploración BD (Entender la base de datos)

summary(titanic)
##      pclass         survived         name               sex           
##  Min.   :1.000   Min.   :0.000   Length:1309        Length:1309       
##  1st Qu.:2.000   1st Qu.:0.000   Class :character   Class :character  
##  Median :3.000   Median :0.000   Mode  :character   Mode  :character  
##  Mean   :2.295   Mean   :0.382                                        
##  3rd Qu.:3.000   3rd Qu.:1.000                                        
##  Max.   :3.000   Max.   :1.000                                        
##                                                                       
##       age              sibsp            parch          ticket         
##  Min.   : 0.1667   Min.   :0.0000   Min.   :0.000   Length:1309       
##  1st Qu.:21.0000   1st Qu.:0.0000   1st Qu.:0.000   Class :character  
##  Median :28.0000   Median :0.0000   Median :0.000   Mode  :character  
##  Mean   :29.8811   Mean   :0.4989   Mean   :0.385                     
##  3rd Qu.:39.0000   3rd Qu.:1.0000   3rd Qu.:0.000                     
##  Max.   :80.0000   Max.   :8.0000   Max.   :9.000                     
##  NA's   :263                                                          
##       fare            cabin             embarked             boat          
##  Min.   :  0.000   Length:1309        Length:1309        Length:1309       
##  1st Qu.:  7.896   Class :character   Class :character   Class :character  
##  Median : 14.454   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 33.295                                                           
##  3rd Qu.: 31.275                                                           
##  Max.   :512.329                                                           
##  NA's   :1                                                                 
##       body        home.dest        
##  Min.   :  1.0   Length:1309       
##  1st Qu.: 72.0   Class :character  
##  Median :155.0   Mode  :character  
##  Mean   :160.8                     
##  3rd Qu.:256.0                     
##  Max.   :328.0                     
##  NA's   :1188
str(titanic)
## 'data.frame':    1309 obs. of  14 variables:
##  $ pclass   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ survived : int  1 1 0 0 0 1 1 0 1 0 ...
##  $ name     : chr  "Allen, Miss. Elisabeth Walton" "Allison, Master. Hudson Trevor" "Allison, Miss. Helen Loraine" "Allison, Mr. Hudson Joshua Creighton" ...
##  $ sex      : chr  "female" "male" "female" "male" ...
##  $ age      : num  29 0.917 2 30 25 ...
##  $ sibsp    : int  0 1 1 1 1 0 1 0 2 0 ...
##  $ parch    : int  0 2 2 2 2 0 0 0 0 0 ...
##  $ ticket   : chr  "24160" "113781" "113781" "113781" ...
##  $ fare     : num  211 152 152 152 152 ...
##  $ cabin    : chr  "B5" "C22 C26" "C22 C26" "C22 C26" ...
##  $ embarked : chr  "S" "S" "S" "S" ...
##  $ boat     : chr  "2" "11" "" "" ...
##  $ body     : int  NA NA NA 135 NA NA NA NA NA 22 ...
##  $ home.dest: chr  "St Louis, MO" "Montreal, PQ / Chesterville, ON" "Montreal, PQ / Chesterville, ON" "Montreal, PQ / Chesterville, ON" ...

Árbol de Decisión

Solamente se utilizan las columnas o variables que se utilizarán para la categorización

Titanic_arbol<- titanic[,c("pclass","age","sex","survived")]

Titanic_arbol$survived<- as.factor(ifelse(Titanic_arbol$survived==0, "Murio","Sobrevive"))

Titanic_arbol$pclass<-as.factor(Titanic_arbol$pclass)

Titanic_arbol$sex<- as.factor(Titanic_arbol$sex)

sum(is.na(Titanic_arbol))
## [1] 263
Titanic_arbol<-na.omit(Titanic_arbol)

Creación Árbol de Decisión

arbol<-rpart(formula = survived~., data = Titanic_arbol)
arbol
## n= 1046 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 1046 427 Murio (0.59177820 0.40822180)  
##    2) sex=male 658 135 Murio (0.79483283 0.20516717)  
##      4) age>=9.5 615 110 Murio (0.82113821 0.17886179) *
##      5) age< 9.5 43  18 Sobrevive (0.41860465 0.58139535)  
##       10) pclass=3 29  11 Murio (0.62068966 0.37931034) *
##       11) pclass=1,2 14   0 Sobrevive (0.00000000 1.00000000) *
##    3) sex=female 388  96 Sobrevive (0.24742268 0.75257732)  
##      6) pclass=3 152  72 Murio (0.52631579 0.47368421)  
##       12) age>=1.5 145  66 Murio (0.54482759 0.45517241) *
##       13) age< 1.5 7   1 Sobrevive (0.14285714 0.85714286) *
##      7) pclass=1,2 236  16 Sobrevive (0.06779661 0.93220339) *
## Visualización primer tipo
rpart.plot(arbol)

## Visualización segundo tipo
prp(arbol, extra = 7,prefix = "Fraction")

Interpretación de resultados

  1. Las más altas probabilidades de sobrevivir en el Titanic es siendo niños varones de menos de 9.5 años y que sea de segunda o tercera clase.
  2. La más baja probabilidad de sobrevivir en el Titanic es siendo un hombre mayor de o igual a 9.5 años con un 0.18% y hombres menores o igual a 9.5 años en tercera clase con un 38%
LS0tDQp0aXRsZTogIlJlZGVzIE5ldXJvbmFsZXMgeSBDbHVzdGVycyBDbGFzZSINCmF1dGhvcjogIkFzdHJpZCBQYW9sYSBHb256w6FsZXogRMOtYXogLSBBMDA4MzAxMTQiDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6IA0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogdHJ1ZQ0KICAgIHRvY19mbG9hdDogdHJ1ZQ0KICAgIGNvZGVfZG93bmxvYWQ6IHRydWUNCi0tLQ0KDQoNCiFbXShpbWFnZXMvVGl0YW5pYy1iYXJjby5wbmcpe3dpZHRoPSIzOTQifQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkNCmBgYA0KDQpgYGB7ciwgaW5jbHVkZT1GQUxTRX0NCmxpYnJhcnkoaHJicnRoZW1lcykNCmxpYnJhcnkocmVhZHhsKQ0KbGlicmFyeShycGFydCkNCmxpYnJhcnkocnBhcnQucGxvdCkNCmBgYA0KDQojIyBJbXBvcnRhY2nDs24gYmFzZSBkZSBkYXRvcw0KDQpgYGB7cn0NCnRpdGFuaWMgPC1yZWFkLmNzdigifi9VTklWRVJTSURBRC9DT05DRU5UUkFDScOTTi90aXRhbmljLmNzdiIpDQoNCmBgYA0KDQojIyBFeHBsb3JhY2nDs24gQkQgKEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3MpDQoNCmBgYHtyfQ0Kc3VtbWFyeSh0aXRhbmljKQ0KYGBgDQoNCmBgYHtyfQ0Kc3RyKHRpdGFuaWMpDQpgYGANCg0KIyDDgXJib2wgZGUgRGVjaXNpw7NuDQoNClNvbGFtZW50ZSBzZSB1dGlsaXphbiBsYXMgY29sdW1uYXMgbyB2YXJpYWJsZXMgcXVlIHNlIHV0aWxpemFyw6FuIHBhcmEgbGEgY2F0ZWdvcml6YWNpw7NuDQoNCmBgYHtyfQ0KVGl0YW5pY19hcmJvbDwtIHRpdGFuaWNbLGMoInBjbGFzcyIsImFnZSIsInNleCIsInN1cnZpdmVkIildDQoNClRpdGFuaWNfYXJib2wkc3Vydml2ZWQ8LSBhcy5mYWN0b3IoaWZlbHNlKFRpdGFuaWNfYXJib2wkc3Vydml2ZWQ9PTAsICJNdXJpbyIsIlNvYnJldml2ZSIpKQ0KDQpUaXRhbmljX2FyYm9sJHBjbGFzczwtYXMuZmFjdG9yKFRpdGFuaWNfYXJib2wkcGNsYXNzKQ0KDQpUaXRhbmljX2FyYm9sJHNleDwtIGFzLmZhY3RvcihUaXRhbmljX2FyYm9sJHNleCkNCg0Kc3VtKGlzLm5hKFRpdGFuaWNfYXJib2wpKQ0KDQpUaXRhbmljX2FyYm9sPC1uYS5vbWl0KFRpdGFuaWNfYXJib2wpDQpgYGANCg0KIyMgQ3JlYWNpw7NuIMOBcmJvbCBkZSBEZWNpc2nDs24NCg0KYGBge3J9DQphcmJvbDwtcnBhcnQoZm9ybXVsYSA9IHN1cnZpdmVkfi4sIGRhdGEgPSBUaXRhbmljX2FyYm9sKQ0KYXJib2wNCg0KIyMgVmlzdWFsaXphY2nDs24gcHJpbWVyIHRpcG8NCnJwYXJ0LnBsb3QoYXJib2wpDQoNCiMjIFZpc3VhbGl6YWNpw7NuIHNlZ3VuZG8gdGlwbw0KcHJwKGFyYm9sLCBleHRyYSA9IDcscHJlZml4ID0gIkZyYWN0aW9uIikNCmBgYA0KDQojIyMgSW50ZXJwcmV0YWNpw7NuIGRlIHJlc3VsdGFkb3MNCg0KMS4gIExhcyBtw6FzIGFsdGFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBlcyBzaWVuZG8gbmnDsW9zIHZhcm9uZXMgZGUgbWVub3MgZGUgOS41IGHDsW9zIHkgcXVlIHNlYSBkZSBzZWd1bmRhIG8gdGVyY2VyYSBjbGFzZS5cDQoyLiAgTGEgbcOhcyBiYWphIHByb2JhYmlsaWRhZCBkZSBzb2JyZXZpdmlyIGVuIGVsIFRpdGFuaWMgZXMgc2llbmRvIHVuIGhvbWJyZSBtYXlvciBkZSBvIGlndWFsIGEgOS41IGHDsW9zIGNvbiB1biAwLjE4JSB5IGhvbWJyZXMgbWVub3JlcyBvIGlndWFsIGEgOS41IGHDsW9zIGVuIHRlcmNlcmEgY2xhc2UgY29uIHVuIDM4JQ0K