## Rows: 1310 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): name, sex, ticket, cabin, embarked, boat, home.dest
## dbl (7): pclass, survived, age, sibsp, parch, fare, body
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## pclass survived name sex
## Min. :1.000 Min. :0.000 Length:1310 Length:1310
## 1st Qu.:2.000 1st Qu.:0.000 Class :character Class :character
## Median :3.000 Median :0.000 Mode :character Mode :character
## Mean :2.295 Mean :0.382
## 3rd Qu.:3.000 3rd Qu.:1.000
## Max. :3.000 Max. :1.000
## NA's :1 NA's :1
## age sibsp parch ticket
## Min. : 0.1667 Min. :0.0000 Min. :0.000 Length:1310
## 1st Qu.:21.0000 1st Qu.:0.0000 1st Qu.:0.000 Class :character
## Median :28.0000 Median :0.0000 Median :0.000 Mode :character
## Mean :29.8811 Mean :0.4989 Mean :0.385
## 3rd Qu.:39.0000 3rd Qu.:1.0000 3rd Qu.:0.000
## Max. :80.0000 Max. :8.0000 Max. :9.000
## NA's :264 NA's :1 NA's :1
## fare cabin embarked boat
## Min. : 0.000 Length:1310 Length:1310 Length:1310
## 1st Qu.: 7.896 Class :character Class :character Class :character
## Median : 14.454 Mode :character Mode :character Mode :character
## Mean : 33.295
## 3rd Qu.: 31.275
## Max. :512.329
## NA's :2
## body home.dest
## Min. : 1.0 Length:1310
## 1st Qu.: 72.0 Class :character
## Median :155.0 Mode :character
## Mean :160.8
## 3rd Qu.:256.0
## Max. :328.0
## NA's :1189
#count(titanic, name, sort=TRUE)
#count(titanic, sex, sort=TRUE)
#count(titanic, ticket, sort=TRUE)
#count(titanic, cabin, sort=TRUE)
#count(titanic, embarked, sort=TRUE)
#count(titanic, boat, sort=TRUE)
#count(titanic, home.dest, sort=TRUE)Observaciones: 1. Tenemos NA en la base de datos. 2. Un par de nombres estan repetidos.
# Cambiar de nombre la variable pclass
colnames(titanic)[1] <- "class"
# Extraer las variables de interes
Titanic <- titanic[,c("class","age","sex","survived")]
#Cuantos NA tenemos en la base de datos?
sum(is.na(Titanic))## [1] 267
## class age sex survived
## 1 264 1 1
# Eliminar NA
Titanic <- na.omit(Titanic)
# Convertir las variables categoricas en factores
Titanic$class <- as.factor(Titanic$class)
Titanic$sex <- as.factor(Titanic$sex)
Titanic$survived <- as.factor(Titanic$survived)