Importar la base de datos

titanic <- read.csv("/Users/lightedit/Documents/TEC SEMESTRE 6.1/M2/R/titanic/titanic.csv")

Entender la base de datos

summary(titanic)
##     Survived          Pclass          Name               Sex           
##  Min.   :0.0000   Min.   :1.000   Length:887         Length:887        
##  1st Qu.:0.0000   1st Qu.:2.000   Class :character   Class :character  
##  Median :0.0000   Median :3.000   Mode  :character   Mode  :character  
##  Mean   :0.3856   Mean   :2.306                                        
##  3rd Qu.:1.0000   3rd Qu.:3.000                                        
##  Max.   :1.0000   Max.   :3.000                                        
##       Age        Siblings.Spouses.Aboard Parents.Children.Aboard
##  Min.   : 0.42   Min.   :0.0000          Min.   :0.0000         
##  1st Qu.:20.25   1st Qu.:0.0000          1st Qu.:0.0000         
##  Median :28.00   Median :0.0000          Median :0.0000         
##  Mean   :29.47   Mean   :0.5254          Mean   :0.3833         
##  3rd Qu.:38.00   3rd Qu.:1.0000          3rd Qu.:0.0000         
##  Max.   :80.00   Max.   :8.0000          Max.   :6.0000         
##       Fare        
##  Min.   :  0.000  
##  1st Qu.:  7.925  
##  Median : 14.454  
##  Mean   : 32.305  
##  3rd Qu.: 31.137  
##  Max.   :512.329
str(titanic)
## 'data.frame':    887 obs. of  8 variables:
##  $ Survived               : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass                 : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Name                   : chr  "Mr. Owen Harris Braund" "Mrs. John Bradley (Florence Briggs Thayer) Cumings" "Miss. Laina Heikkinen" "Mrs. Jacques Heath (Lily May Peel) Futrelle" ...
##  $ Sex                    : chr  "male" "female" "female" "female" ...
##  $ Age                    : num  22 38 26 35 35 27 54 2 27 14 ...
##  $ Siblings.Spouses.Aboard: int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parents.Children.Aboard: int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Fare                   : num  7.25 71.28 7.92 53.1 8.05 ...

Filtrar base de datos

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.5.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#Titanic <- titanic[,c("pclass","age","sex","survived")]
Titanic <- titanic %>% select(Pclass,Age, Sex, Survived)
Titanic$Survived <- as.factor(ifelse(Titanic$Survived==0, "Murio", "Sobrevive"))
Titanic$Pclass <- as.factor(Titanic$Pclass)
Titanic$Sex <-  as.factor(Titanic$Sex)
str(Titanic)
## 'data.frame':    887 obs. of  4 variables:
##  $ Pclass  : Factor w/ 3 levels "1","2","3": 3 1 3 1 3 3 1 3 3 2 ...
##  $ Age     : num  22 38 26 35 35 27 54 2 27 14 ...
##  $ Sex     : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
##  $ Survived: Factor w/ 2 levels "Murio","Sobrevive": 1 2 2 2 1 1 1 1 2 2 ...
sum(is.na(Titanic))
## [1] 0
sapply(Titanic, function(x) sum(is.na(x)))
##   Pclass      Age      Sex Survived 
##        0        0        0        0
Titanic <- na.omit(Titanic)

Crear arbol de decisión

# install.packages("rpart")
library(rpart)
arbol <- rpart(formula=Survived ~ ., data = Titanic)
arbol
## n= 887 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 887 342 Murio (0.61443067 0.38556933)  
##    2) Sex=male 573 109 Murio (0.80977312 0.19022688)  
##      4) Age>=13 532  86 Murio (0.83834586 0.16165414) *
##      5) Age< 13 41  18 Sobrevive (0.43902439 0.56097561)  
##       10) Pclass=3 29  11 Murio (0.62068966 0.37931034) *
##       11) Pclass=1,2 12   0 Sobrevive (0.00000000 1.00000000) *
##    3) Sex=female 314  81 Sobrevive (0.25796178 0.74203822)  
##      6) Pclass=3 144  72 Murio (0.50000000 0.50000000)  
##       12) Age>=38.5 15   1 Murio (0.93333333 0.06666667) *
##       13) Age< 38.5 129  58 Sobrevive (0.44961240 0.55038760)  
##         26) Age< 21.5 71  33 Murio (0.53521127 0.46478873)  
##           52) Age>=5.5 54  21 Murio (0.61111111 0.38888889) *
##           53) Age< 5.5 17   5 Sobrevive (0.29411765 0.70588235) *
##         27) Age>=21.5 58  20 Sobrevive (0.34482759 0.65517241) *
##      7) Pclass=1,2 170   9 Sobrevive (0.05294118 0.94705882) *
# install.packages("rpart.plot")
library(rpart.plot)
rpart.plot(arbol)

prp(arbol,extra = 7,prefix = "fraccion")

Conclusiones

  1. Las más altas probabilidades de sobrevivir en el Titanic son niño varón menor de 9.5 años de 1° y 2° clase (100%), y mujeres en 1° y 2° clase (93%).
  2. Las más bajas probabilidades de sobrevivir en el Titanic son los hombres mayores de 9.5 años (18%), y los hombres menores de 9.5 años en 3° clase (38%)
LS0tCnRpdGxlOiAiVGl0YW5pYyIKYXV0aG9yOiAiR2lsYmVydG8gTWVuY2hhY2EiCmRhdGU6ICIyMDIzLTAyLTE0IgpvdXRwdXQ6IAogaHRtbF9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlIAotLS0KCiFbXSgvVXNlcnMvbGlnaHRlZGl0L0RvY3VtZW50cy9URUMgU0VNRVNUUkUgNi4xL00yL1IvdGl0YW5pYy9STVNfVGl0YW5pY18zLmpwZykKCiMjIEltcG9ydGFyIGxhIGJhc2UgZGUgZGF0b3MKCmBgYHtyfQp0aXRhbmljIDwtIHJlYWQuY3N2KCIvVXNlcnMvbGlnaHRlZGl0L0RvY3VtZW50cy9URUMgU0VNRVNUUkUgNi4xL00yL1IvdGl0YW5pYy90aXRhbmljLmNzdiIpCmBgYAoKIyMgRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvcwoKYGBge3J9CnN1bW1hcnkodGl0YW5pYykKc3RyKHRpdGFuaWMpCmBgYAoKIyMgRmlsdHJhciBiYXNlIGRlIGRhdG9zCgpgYGB7cn0KbGlicmFyeShkcGx5cikKbGlicmFyeSh0aWR5dmVyc2UpCiNUaXRhbmljIDwtIHRpdGFuaWNbLGMoInBjbGFzcyIsImFnZSIsInNleCIsInN1cnZpdmVkIildClRpdGFuaWMgPC0gdGl0YW5pYyAlPiUgc2VsZWN0KFBjbGFzcyxBZ2UsIFNleCwgU3Vydml2ZWQpClRpdGFuaWMkU3Vydml2ZWQgPC0gYXMuZmFjdG9yKGlmZWxzZShUaXRhbmljJFN1cnZpdmVkPT0wLCAiTXVyaW8iLCAiU29icmV2aXZlIikpClRpdGFuaWMkUGNsYXNzIDwtIGFzLmZhY3RvcihUaXRhbmljJFBjbGFzcykKVGl0YW5pYyRTZXggPC0gIGFzLmZhY3RvcihUaXRhbmljJFNleCkKc3RyKFRpdGFuaWMpCgpzdW0oaXMubmEoVGl0YW5pYykpCnNhcHBseShUaXRhbmljLCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkpKQoKVGl0YW5pYyA8LSBuYS5vbWl0KFRpdGFuaWMpCgpgYGAKCiMjIENyZWFyIGFyYm9sIGRlIGRlY2lzacOzbgoKYGBge3J9CiMgaW5zdGFsbC5wYWNrYWdlcygicnBhcnQiKQpsaWJyYXJ5KHJwYXJ0KQphcmJvbCA8LSBycGFydChmb3JtdWxhPVN1cnZpdmVkIH4gLiwgZGF0YSA9IFRpdGFuaWMpCmFyYm9sCgojIGluc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCnJwYXJ0LnBsb3QoYXJib2wpCnBycChhcmJvbCxleHRyYSA9IDcscHJlZml4ID0gImZyYWNjaW9uIikKYGBgCgojIyBDb25jbHVzaW9uZXMKCjEuIExhcyBtw6FzIGFsdGFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBzb24gbmnDsW8gdmFyw7NuIG1lbm9yIGRlIDkuNSBhw7FvcyBkZSAxwrAgeSAywrAgY2xhc2UgKDEwMCUpLCB5IG11amVyZXMgZW4gMcKwIHkgMsKwIGNsYXNlICg5MyUpLiAgCjIuIExhcyBtw6FzIGJhamFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBzb24gbG9zIGhvbWJyZXMgbWF5b3JlcyBkZSA5LjUgYcOxb3MgKDE4JSksIHkgbG9zIGhvbWJyZXMgbWVub3JlcyBkZSA5LjUgYcOxb3MgZW4gM8KwIGNsYXNlICgzOCUpCgoK