# Imputacion casos perdidos
library(palmerpenguins)
## Warning: package 'palmerpenguins' was built under R version 4.1.3
library(missForest)
## Warning: package 'missForest' was built under R version 4.1.3
## Loading required package: randomForest
## Warning: package 'randomForest' was built under R version 4.1.3
## randomForest 4.7-1
## Type rfNews() to see new features/changes/bug fixes.
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 4.1.3
## Loading required package: itertools
## Warning: package 'itertools' was built under R version 4.1.3
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 4.1.3
#Cargamos Datos
data(package = 'palmerpenguins')
head(penguins)
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_~ body_mass_g sex
## <fct> <fct> <dbl> <dbl> <int> <int> <fct>
## 1 Adelie Torge~ 39.1 18.7 181 3750 male
## 2 Adelie Torge~ 39.5 17.4 186 3800 fema~
## 3 Adelie Torge~ 40.3 18 195 3250 fema~
## 4 Adelie Torge~ NA NA NA NA <NA>
## 5 Adelie Torge~ 36.7 19.3 193 3450 fema~
## 6 Adelie Torge~ 39.3 20.6 190 3650 male
## # ... with 1 more variable: year <int>
df <- as.data.frame(penguins)
View(df)
sapply(df, class)
## species island bill_length_mm bill_depth_mm
## "factor" "factor" "numeric" "numeric"
## flipper_length_mm body_mass_g sex year
## "integer" "integer" "factor" "integer"
# Si hubiera que hacer cambios Inv <- lapply(Inv,as.ordered)
# Hacemos al imputacion
imp <- missForest(df, verbose = T, variablewise = F)
## missForest iteration 1 in progress...done!
## estimated error(s): 0.08166315 0.02602603
## difference(s): 0.0001325624 0.007751938
## time: 0.27 seconds
##
## missForest iteration 2 in progress...done!
## estimated error(s): 0.07774253 0.03103103
## difference(s): 2.302424e-06 0
## time: 0.22 seconds
##
## missForest iteration 3 in progress...done!
## estimated error(s): 0.07832136 0.02802803
## difference(s): 1.594966e-06 0
## time: 0.25 seconds
##
## missForest iteration 4 in progress...done!
## estimated error(s): 0.07986325 0.02802803
## difference(s): 2.453078e-07 0
## time: 0.25 seconds
##
## missForest iteration 5 in progress...done!
## estimated error(s): 0.0793592 0.03303303
## difference(s): 5.903899e-07 0
## time: 0.25 seconds
imp$OOBerror
## NRMSE PFC
## 0.07986325 0.02802803
imp <- missForest(df, verbose = T, variablewise = T)
## missForest iteration 1 in progress...done!
## estimated error(s): 0 0 5.508574 0.6528305 24.60605 94540.02 0.08708709 0
## difference(s): 0.0001695107 0.007751938
## time: 0.23 seconds
##
## missForest iteration 2 in progress...done!
## estimated error(s): 0 0 5.336754 0.639392 25.57907 85297.44 0.1021021 0
## difference(s): 1.118621e-06 0
## time: 0.24 seconds
##
## missForest iteration 3 in progress...done!
## estimated error(s): 0 0 5.446977 0.6385925 24.34427 87917.32 0.09309309 0
## difference(s): 1.580543e-06 0
## time: 0.21 seconds
imp$OOBerror
## PFC PFC MSE MSE MSE MSE
## 0.000000e+00 0.000000e+00 5.336754e+00 6.393920e-01 2.557907e+01 8.529744e+04
## PFC MSE
## 1.021021e-01 0.000000e+00
sapply(df, class)
## species island bill_length_mm bill_depth_mm
## "factor" "factor" "numeric" "numeric"
## flipper_length_mm body_mass_g sex year
## "integer" "integer" "factor" "integer"
dflimpio <- as.data.frame(imp$ximp)
View(dflimpio)
comparacion <- cbind(df$body_mass_g, df$sex, dflimpio$sex)
View(comparacion)