load library
library(missForest)
library(mice)
Make Missing Values
knitr::kable(summary(iris))
|
Min. :4.300 |
Min. :2.000 |
Min. :1.000 |
Min. :0.100 |
setosa :50 |
|
1st Qu.:5.100 |
1st Qu.:2.800 |
1st Qu.:1.600 |
1st Qu.:0.300 |
versicolor:50 |
|
Median :5.800 |
Median :3.000 |
Median :4.350 |
Median :1.300 |
virginica :50 |
|
Mean :5.843 |
Mean :3.057 |
Mean :3.758 |
Mean :1.199 |
NA |
|
3rd Qu.:6.400 |
3rd Qu.:3.300 |
3rd Qu.:5.100 |
3rd Qu.:1.800 |
NA |
|
Max. :7.900 |
Max. :4.400 |
Max. :6.900 |
Max. :2.500 |
NA |
iris.mis = prodNA(iris, noNA = 0.1)
knitr::kable(summary(iris.mis))
|
Min. :4.300 |
Min. :2.200 |
Min. :1.000 |
Min. :0.100 |
setosa :43 |
|
1st Qu.:5.100 |
1st Qu.:2.800 |
1st Qu.:1.575 |
1st Qu.:0.300 |
versicolor:48 |
|
Median :5.700 |
Median :3.000 |
Median :4.300 |
Median :1.300 |
virginica :45 |
|
Mean :5.821 |
Mean :3.063 |
Mean :3.746 |
Mean :1.197 |
NA’s :14 |
|
3rd Qu.:6.400 |
3rd Qu.:3.350 |
3rd Qu.:5.100 |
3rd Qu.:1.800 |
NA |
|
Max. :7.900 |
Max. :4.400 |
Max. :6.900 |
Max. :2.500 |
NA |
|
NA’s :14 |
NA’s :15 |
NA’s :14 |
NA’s :18 |
NA |
Treating continuous variables
iris.mis = subset(iris.mis, select = -c(Species))
md.pattern(iris.mis)
Sepal.Length Petal.Length Sepal.Width Petal.Width
93 1 1 1 1 0
13 0 1 1 1 1
13 1 1 0 1 1
11 1 0 1 1 1
16 1 1 1 0 1
1 0 0 1 1 2
1 1 0 0 1 2
1 1 1 0 0 2
1 1 0 1 0 2
14 14 15 18 61
Impute missing values
- PMM (Predictive Mean Matching) – For numeric variables
- logreg(Logistic Regression) – For Binary Variables( with 2 levels)
- polyreg(Bayesian polytomous regression) – For Factor Variables (>= 2 levels)
- Proportional odds model (ordered, >= 2 levels)
imputed_Data = mice(iris.mis, m = 1, maxit = 50, method = "pmm", seed = 500)
summary(imputed_Data)
completeData = complete(imputed_Data)
knitr::kable(head(completeData))
| 5.1 |
3.5 |
1.4 |
0.2 |
| 4.9 |
3.0 |
1.4 |
0.2 |
| 4.7 |
3.0 |
1.3 |
0.2 |
| 4.6 |
3.1 |
1.5 |
0.2 |
| 5.0 |
3.6 |
1.4 |
0.2 |
| 5.4 |
3.9 |
1.7 |
0.4 |
knitr::kable(head(iris.mis))
| 5.1 |
3.5 |
1.4 |
0.2 |
| 4.9 |
3.0 |
1.4 |
NA |
| 4.7 |
NA |
1.3 |
0.2 |
| 4.6 |
NA |
1.5 |
NA |
| 5.0 |
3.6 |
1.4 |
NA |
| 5.4 |
3.9 |
1.7 |
0.4 |
| 5.1 |
3.5 |
1.4 |
0.2 |
setosa |
| 4.9 |
3.0 |
1.4 |
0.2 |
setosa |
| 4.7 |
3.2 |
1.3 |
0.2 |
setosa |
| 4.6 |
3.1 |
1.5 |
0.2 |
setosa |
| 5.0 |
3.6 |
1.4 |
0.2 |
setosa |
| 5.4 |
3.9 |
1.7 |
0.4 |
setosa |