##Cargar librerias

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(rpart)
library(rpart.plot)
library(rattle)
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Versión 5.4.0 Copyright (c) 2006-2020 Togaware Pty Ltd.
## Escriba 'rattle()' para agitar, sacudir y  rotar sus datos.

Cargar arvhivo .csv del titanic

library(readr)
titanic3 <- read_csv("C:/Users/Nicolas/Downloads/titanic3.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   pclass = col_character(),
##   survived = col_double(),
##   name = col_character(),
##   sex = col_character(),
##   age = col_double(),
##   sibsp = col_double(),
##   parch = col_double(),
##   ticket = col_character(),
##   fare = col_double(),
##   cabin = col_character(),
##   embarked = col_character(),
##   boat = col_character(),
##   body = col_double(),
##   home.dest = col_character()
## )
View(titanic3)

Variable target (sobrevida) debe ser de tipo FACTOR.

titanic3$survived <- ifelse(titanic3$survived == 1, "Si", "No")
titanic3$survived <- as.factor(titanic3$survived)

titanic3$pclass <-as.factor(titanic3$pclass)
titanic3$sibsp <- as.factor(titanic3$sibsp)
titanic3$parch <- as.factor(titanic3$parch)
titanic3$sex      <- as.factor(titanic3$sex)
titanic3$embarked <- as.factor(titanic3$embarked)

head (titanic3)
## # A tibble: 6 x 14
##   pclass survived name       sex     age sibsp parch ticket  fare cabin embarked
##   <fct>  <fct>    <chr>      <fct> <dbl> <fct> <fct> <chr>  <dbl> <chr> <fct>   
## 1 1st    Si       Allen, Mi~ fema~ 29    0     0     24160  211.  B5    S       
## 2 1st    Si       Allison, ~ male   0.92 1     2     113781 152.  C22 ~ S       
## 3 1st    No       Allison, ~ fema~  2    1     2     113781 152.  C22 ~ S       
## 4 1st    No       Allison, ~ male  30    1     2     113781 152.  C22 ~ S       
## 5 1st    No       Allison, ~ fema~ 25    1     2     113781 152.  C22 ~ S       
## 6 1st    Si       Anderson,~ male  48    0     0     19952   26.6 E12   S       
## # ... with 3 more variables: boat <chr>, body <dbl>, home.dest <chr>

Arbol 1: Comparar variable target con el resto de variables.

#Modelamiento de arbol

arbol1 <- rpart(formula = survived ~ sex+age+pclass+sibsp+parch+embarked,
               data = titanic3, method = 'class')

#Graficar arbol sexo-edad
fancyRpartPlot(arbol1)

Arbol 2: Evaluar variable target versus variable de mayor incidencia.

# modelamiento arbol con mayor incidencia (edad=24)
arbol2 <- rpart(formula = survived ~ age>=9.5,
               data = titanic3, method = 'class')
fancyRpartPlot(arbol2)