basedatos <- read.csv("titanic.csv")
summary(basedatos)
## ï..pclass survived name sex
## Min. :1.000 Min. :0.000 Length:1310 Length:1310
## 1st Qu.:2.000 1st Qu.:0.000 Class :character Class :character
## Median :3.000 Median :0.000 Mode :character Mode :character
## Mean :2.295 Mean :0.382
## 3rd Qu.:3.000 3rd Qu.:1.000
## Max. :3.000 Max. :1.000
## NA's :1 NA's :1
## age sibsp parch ticket
## Min. : 0.1667 Min. :0.0000 Min. :0.000 Length:1310
## 1st Qu.:21.0000 1st Qu.:0.0000 1st Qu.:0.000 Class :character
## Median :28.0000 Median :0.0000 Median :0.000 Mode :character
## Mean :29.8811 Mean :0.4989 Mean :0.385
## 3rd Qu.:39.0000 3rd Qu.:1.0000 3rd Qu.:0.000
## Max. :80.0000 Max. :8.0000 Max. :9.000
## NA's :264 NA's :1 NA's :1
## fare cabin embarked boat
## Min. : 0.000 Length:1310 Length:1310 Length:1310
## 1st Qu.: 7.896 Class :character Class :character Class :character
## Median : 14.454 Mode :character Mode :character Mode :character
## Mean : 33.295
## 3rd Qu.: 31.275
## Max. :512.329
## NA's :2
## body home.dest
## Min. : 1.0 Length:1310
## 1st Qu.: 72.0 Class :character
## Median :155.0 Mode :character
## Mean :160.8
## 3rd Qu.:256.0
## Max. :328.0
## NA's :1189
##Filtro
Titanic <- basedatos[,c( "ï..pclass" , "age" , "sex", "survived")]
##Convertir tipos de variables
Titanic$survived <- as.factor(Titanic$survived)
Titanic$ï..pclass <- as.factor(Titanic$ï..pclass)
Titanic$age <- as.factor(Titanic$age)
Titanic$sex <- as.factor(Titanic$sex)
##Eliminar NAs
sum(is.na(Titanic))
## [1] 266
Titanic <- na.omit(Titanic)
##Crear arbol de deciciones
library(rpart)
## Warning: package 'rpart' was built under R version 4.1.3
arbol <- rpart(formula = survived ~.,data = Titanic)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.1.3
rpart.plot(arbol)
prp(arbol, extra = 7, prefix = "fraccion\n")