
Importar la base de datos
titanic <- read.csv("/Users/lightedit/Documents/TEC SEMESTRE 6.1/M2/R/titanic/titanic.csv")
Entender la base de datos
summary(titanic)
## Survived Pclass Name Sex
## Min. :0.0000 Min. :1.000 Length:887 Length:887
## 1st Qu.:0.0000 1st Qu.:2.000 Class :character Class :character
## Median :0.0000 Median :3.000 Mode :character Mode :character
## Mean :0.3856 Mean :2.306
## 3rd Qu.:1.0000 3rd Qu.:3.000
## Max. :1.0000 Max. :3.000
## Age Siblings.Spouses.Aboard Parents.Children.Aboard
## Min. : 0.42 Min. :0.0000 Min. :0.0000
## 1st Qu.:20.25 1st Qu.:0.0000 1st Qu.:0.0000
## Median :28.00 Median :0.0000 Median :0.0000
## Mean :29.47 Mean :0.5254 Mean :0.3833
## 3rd Qu.:38.00 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :80.00 Max. :8.0000 Max. :6.0000
## Fare
## Min. : 0.000
## 1st Qu.: 7.925
## Median : 14.454
## Mean : 32.305
## 3rd Qu.: 31.137
## Max. :512.329
str(titanic)
## 'data.frame': 887 obs. of 8 variables:
## $ Survived : int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Name : chr "Mr. Owen Harris Braund" "Mrs. John Bradley (Florence Briggs Thayer) Cumings" "Miss. Laina Heikkinen" "Mrs. Jacques Heath (Lily May Peel) Futrelle" ...
## $ Sex : chr "male" "female" "female" "female" ...
## $ Age : num 22 38 26 35 35 27 54 2 27 14 ...
## $ Siblings.Spouses.Aboard: int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parents.Children.Aboard: int 0 0 0 0 0 0 0 1 2 0 ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
Filtrar base de datos
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.5.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#Titanic <- titanic[,c("pclass","age","sex","survived")]
Titanic <- titanic %>% select(Pclass,Age, Sex, Survived)
Titanic$Survived <- as.factor(ifelse(Titanic$Survived==0, "Murio", "Sobrevive"))
Titanic$Pclass <- as.factor(Titanic$Pclass)
Titanic$Sex <- as.factor(Titanic$Sex)
str(Titanic)
## 'data.frame': 887 obs. of 4 variables:
## $ Pclass : Factor w/ 3 levels "1","2","3": 3 1 3 1 3 3 1 3 3 2 ...
## $ Age : num 22 38 26 35 35 27 54 2 27 14 ...
## $ Sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
## $ Survived: Factor w/ 2 levels "Murio","Sobrevive": 1 2 2 2 1 1 1 1 2 2 ...
sum(is.na(Titanic))
## [1] 0
sapply(Titanic, function(x) sum(is.na(x)))
## Pclass Age Sex Survived
## 0 0 0 0
Titanic <- na.omit(Titanic)
Crear arbol de decisión
# install.packages("rpart")
library(rpart)
arbol <- rpart(formula=Survived ~ ., data = Titanic)
arbol
## n= 887
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 887 342 Murio (0.61443067 0.38556933)
## 2) Sex=male 573 109 Murio (0.80977312 0.19022688)
## 4) Age>=13 532 86 Murio (0.83834586 0.16165414) *
## 5) Age< 13 41 18 Sobrevive (0.43902439 0.56097561)
## 10) Pclass=3 29 11 Murio (0.62068966 0.37931034) *
## 11) Pclass=1,2 12 0 Sobrevive (0.00000000 1.00000000) *
## 3) Sex=female 314 81 Sobrevive (0.25796178 0.74203822)
## 6) Pclass=3 144 72 Murio (0.50000000 0.50000000)
## 12) Age>=38.5 15 1 Murio (0.93333333 0.06666667) *
## 13) Age< 38.5 129 58 Sobrevive (0.44961240 0.55038760)
## 26) Age< 21.5 71 33 Murio (0.53521127 0.46478873)
## 52) Age>=5.5 54 21 Murio (0.61111111 0.38888889) *
## 53) Age< 5.5 17 5 Sobrevive (0.29411765 0.70588235) *
## 27) Age>=21.5 58 20 Sobrevive (0.34482759 0.65517241) *
## 7) Pclass=1,2 170 9 Sobrevive (0.05294118 0.94705882) *
# install.packages("rpart.plot")
library(rpart.plot)
rpart.plot(arbol)

prp(arbol,extra = 7,prefix = "fraccion")

Conclusiones
- Las más altas probabilidades de sobrevivir en el Titanic son niño
varón menor de 9.5 años de 1° y 2° clase (100%), y mujeres en 1° y 2°
clase (93%).
- Las más bajas probabilidades de sobrevivir en el Titanic son los
hombres mayores de 9.5 años (18%), y los hombres menores de 9.5 años en
3° clase (38%)
LS0tCnRpdGxlOiAiVGl0YW5pYyIKYXV0aG9yOiAiR2lsYmVydG8gTWVuY2hhY2EiCmRhdGU6ICIyMDIzLTAyLTE0IgpvdXRwdXQ6IAogaHRtbF9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlIAotLS0KCiFbXSgvVXNlcnMvbGlnaHRlZGl0L0RvY3VtZW50cy9URUMgU0VNRVNUUkUgNi4xL00yL1IvdGl0YW5pYy9STVNfVGl0YW5pY18zLmpwZykKCiMjIEltcG9ydGFyIGxhIGJhc2UgZGUgZGF0b3MKCmBgYHtyfQp0aXRhbmljIDwtIHJlYWQuY3N2KCIvVXNlcnMvbGlnaHRlZGl0L0RvY3VtZW50cy9URUMgU0VNRVNUUkUgNi4xL00yL1IvdGl0YW5pYy90aXRhbmljLmNzdiIpCmBgYAoKIyMgRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvcwoKYGBge3J9CnN1bW1hcnkodGl0YW5pYykKc3RyKHRpdGFuaWMpCmBgYAoKIyMgRmlsdHJhciBiYXNlIGRlIGRhdG9zCgpgYGB7cn0KbGlicmFyeShkcGx5cikKbGlicmFyeSh0aWR5dmVyc2UpCiNUaXRhbmljIDwtIHRpdGFuaWNbLGMoInBjbGFzcyIsImFnZSIsInNleCIsInN1cnZpdmVkIildClRpdGFuaWMgPC0gdGl0YW5pYyAlPiUgc2VsZWN0KFBjbGFzcyxBZ2UsIFNleCwgU3Vydml2ZWQpClRpdGFuaWMkU3Vydml2ZWQgPC0gYXMuZmFjdG9yKGlmZWxzZShUaXRhbmljJFN1cnZpdmVkPT0wLCAiTXVyaW8iLCAiU29icmV2aXZlIikpClRpdGFuaWMkUGNsYXNzIDwtIGFzLmZhY3RvcihUaXRhbmljJFBjbGFzcykKVGl0YW5pYyRTZXggPC0gIGFzLmZhY3RvcihUaXRhbmljJFNleCkKc3RyKFRpdGFuaWMpCgpzdW0oaXMubmEoVGl0YW5pYykpCnNhcHBseShUaXRhbmljLCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkpKQoKVGl0YW5pYyA8LSBuYS5vbWl0KFRpdGFuaWMpCgpgYGAKCiMjIENyZWFyIGFyYm9sIGRlIGRlY2lzacOzbgoKYGBge3J9CiMgaW5zdGFsbC5wYWNrYWdlcygicnBhcnQiKQpsaWJyYXJ5KHJwYXJ0KQphcmJvbCA8LSBycGFydChmb3JtdWxhPVN1cnZpdmVkIH4gLiwgZGF0YSA9IFRpdGFuaWMpCmFyYm9sCgojIGluc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCnJwYXJ0LnBsb3QoYXJib2wpCnBycChhcmJvbCxleHRyYSA9IDcscHJlZml4ID0gImZyYWNjaW9uIikKYGBgCgojIyBDb25jbHVzaW9uZXMKCjEuIExhcyBtw6FzIGFsdGFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBzb24gbmnDsW8gdmFyw7NuIG1lbm9yIGRlIDkuNSBhw7FvcyBkZSAxwrAgeSAywrAgY2xhc2UgKDEwMCUpLCB5IG11amVyZXMgZW4gMcKwIHkgMsKwIGNsYXNlICg5MyUpLiAgCjIuIExhcyBtw6FzIGJhamFzIHByb2JhYmlsaWRhZGVzIGRlIHNvYnJldml2aXIgZW4gZWwgVGl0YW5pYyBzb24gbG9zIGhvbWJyZXMgbWF5b3JlcyBkZSA5LjUgYcOxb3MgKDE4JSksIHkgbG9zIGhvbWJyZXMgbWVub3JlcyBkZSA5LjUgYcOxb3MgZW4gM8KwIGNsYXNlICgzOCUpCgoK