1. The UC Irvine Machine Learning Repository6 contains a data set related to glass identification. The data consist of 214 glass samples labeled as one of seven class categories. There are nine predictors, including the refractive index and percentages of eight elements: Na, Mg, Al, Si, K, Ca, Ba, and Fe. The data can be accessed via: library(mlbench), data(Glass), str(Glass)
library(mlbench)
data(Glass)
str(Glass)
## 'data.frame':    214 obs. of  10 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...

Using visualizations, explore the predictor variables to understand their distributions as well as the relationships between predictors.

library(knitr)
kable(head(Glass, 10))
RI Na Mg Al Si K Ca Ba Fe Type
1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0 0.00 1
1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0 0.00 1
1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0 0.00 1
1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0 0.00 1
1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0 0.00 1
1.51596 12.79 3.61 1.62 72.97 0.64 8.07 0 0.26 1
1.51743 13.30 3.60 1.14 73.09 0.58 8.17 0 0.00 1
1.51756 13.15 3.61 1.05 73.24 0.57 8.24 0 0.00 1
1.51918 14.04 3.58 1.37 72.08 0.56 8.30 0 0.00 1
1.51755 13.00 3.60 1.36 72.99 0.57 8.40 0 0.11 1
summary(Glass)
##        RI              Na              Mg              Al       
##  Min.   :1.511   Min.   :10.73   Min.   :0.000   Min.   :0.290  
##  1st Qu.:1.517   1st Qu.:12.91   1st Qu.:2.115   1st Qu.:1.190  
##  Median :1.518   Median :13.30   Median :3.480   Median :1.360  
##  Mean   :1.518   Mean   :13.41   Mean   :2.685   Mean   :1.445  
##  3rd Qu.:1.519   3rd Qu.:13.82   3rd Qu.:3.600   3rd Qu.:1.630  
##  Max.   :1.534   Max.   :17.38   Max.   :4.490   Max.   :3.500  
##        Si              K                Ca               Ba       
##  Min.   :69.81   Min.   :0.0000   Min.   : 5.430   Min.   :0.000  
##  1st Qu.:72.28   1st Qu.:0.1225   1st Qu.: 8.240   1st Qu.:0.000  
##  Median :72.79   Median :0.5550   Median : 8.600   Median :0.000  
##  Mean   :72.65   Mean   :0.4971   Mean   : 8.957   Mean   :0.175  
##  3rd Qu.:73.09   3rd Qu.:0.6100   3rd Qu.: 9.172   3rd Qu.:0.000  
##  Max.   :75.41   Max.   :6.2100   Max.   :16.190   Max.   :3.150  
##        Fe          Type  
##  Min.   :0.00000   1:70  
##  1st Qu.:0.00000   2:76  
##  Median :0.00000   3:17  
##  Mean   :0.05701   5:13  
##  3rd Qu.:0.10000   6: 9  
##  Max.   :0.51000   7:29
library(psych)
describe(Glass)
##       vars   n  mean   sd median trimmed  mad   min   max range  skew
## RI       1 214  1.52 0.00   1.52    1.52 0.00  1.51  1.53  0.02  1.60
## Na       2 214 13.41 0.82  13.30   13.38 0.64 10.73 17.38  6.65  0.45
## Mg       3 214  2.68 1.44   3.48    2.87 0.30  0.00  4.49  4.49 -1.14
## Al       4 214  1.44 0.50   1.36    1.41 0.31  0.29  3.50  3.21  0.89
## Si       5 214 72.65 0.77  72.79   72.71 0.57 69.81 75.41  5.60 -0.72
## K        6 214  0.50 0.65   0.56    0.43 0.17  0.00  6.21  6.21  6.46
## Ca       7 214  8.96 1.42   8.60    8.74 0.66  5.43 16.19 10.76  2.02
## Ba       8 214  0.18 0.50   0.00    0.03 0.00  0.00  3.15  3.15  3.37
## Fe       9 214  0.06 0.10   0.00    0.04 0.00  0.00  0.51  0.51  1.73
## Type*   10 214  2.54 1.71   2.00    2.31 1.48  1.00  6.00  5.00  1.04
##       kurtosis   se
## RI        4.72 0.00
## Na        2.90 0.06
## Mg       -0.45 0.10
## Al        1.94 0.03
## Si        2.82 0.05
## K        52.87 0.04
## Ca        6.41 0.10
## Ba       12.08 0.03
## Fe        2.52 0.01
## Type*    -0.29 0.12
library(ggplot2)
library(reshape2)
ggplot(melt(Glass, id.vars=c('Type')), aes(x=value)) + 
  geom_histogram(bins=50) + 
  facet_wrap(~variable, scale="free") 

library(corrplot)
corrplot(cor(Glass[,1:9]), order = "hclust")

library(caret)
corelation <- cor(Glass[,-10])
highCorr <- findCorrelation(corelation, cutoff = .75)
print(paste0("Total number of Predictor Variables with Pearson Correlation > 0.75: ",length(highCorr)))
## [1] "Total number of Predictor Variables with Pearson Correlation > 0.75: 1"
cor(Glass[,c('Ca','RI')])
##           Ca        RI
## Ca 1.0000000 0.8104027
## RI 0.8104027 1.0000000

Also, Thre is strong corelation between Ca and RI

Do there appear to be any outliers in the data? Are any predictors skewed?

# Total 9 columns
for(i in 1:9) {
  print(paste0("Predictor Variable (outlier values): ", colnames(Glass[i])))
  print(paste0(boxplot(Glass[i],plot=FALSE)$out))
}
## [1] "Predictor Variable (outlier values): RI"
##  [1] "1.52667" "1.5232"  "1.51215" "1.52725" "1.5241"  "1.52475" "1.53125"
##  [8] "1.53393" "1.52664" "1.52739" "1.52777" "1.52614" "1.52369" "1.51115"
## [15] "1.51131" "1.52315" "1.52365"
## [1] "Predictor Variable (outlier values): Na"
## [1] "11.45" "10.73" "11.23" "11.02" "11.03" "17.38" "15.79"
## [1] "Predictor Variable (outlier values): Mg"
## character(0)
## [1] "Predictor Variable (outlier values): Al"
##  [1] "0.29" "0.47" "0.47" "0.51" "3.5"  "3.04" "3.02" "0.34" "2.38" "2.79"
## [11] "2.68" "2.54" "2.34" "2.66" "2.51" "2.42" "2.74" "2.88"
## [1] "Predictor Variable (outlier values): Si"
##  [1] "70.57" "69.81" "70.16" "74.45" "69.89" "70.48" "70.7"  "74.55"
##  [9] "75.41" "70.26" "70.43" "75.18"
## [1] "Predictor Variable (outlier values): K"
## [1] "1.68" "6.21" "6.21" "1.76" "1.46" "2.7"  "1.41"
## [1] "Predictor Variable (outlier values): Ca"
##  [1] "11.64" "10.79" "13.24" "13.3"  "16.19" "11.52" "10.99" "14.68"
##  [9] "14.96" "14.4"  "11.14" "13.44" "5.87"  "11.41" "11.62" "11.53"
## [17] "11.32" "12.24" "12.5"  "11.27" "10.88" "11.22" "6.65"  "5.43" 
## [25] "5.79"  "6.47" 
## [1] "Predictor Variable (outlier values): Ba"
##  [1] "0.09" "0.11" "0.69" "0.14" "0.11" "3.15" "0.27" "0.09" "0.06" "0.15"
## [11] "2.2"  "0.24" "1.19" "1.63" "1.68" "0.76" "0.64" "0.4"  "1.59" "1.57"
## [21] "0.61" "0.81" "0.66" "0.64" "0.53" "0.63" "0.56" "1.71" "0.67" "1.55"
## [31] "1.38" "2.88" "0.54" "1.06" "1.59" "1.64" "1.57" "1.67"
## [1] "Predictor Variable (outlier values): Fe"
##  [1] "0.26" "0.3"  "0.31" "0.32" "0.34" "0.28" "0.29" "0.28" "0.35" "0.37"
## [11] "0.51" "0.28"

Yes, there are outliers in the predictor variable

library(e1071)
apply(Glass[,-10], 2, skewness)
##         RI         Na         Mg         Al         Si          K 
##  1.6027151  0.4478343 -1.1364523  0.8946104 -0.7202392  6.4600889 
##         Ca         Ba         Fe 
##  2.0184463  3.3686800  1.7298107

The data also shows high skewed values and transformation techniques need to be used.

Are there any relevant transformations of one or more predictors that might improve the classification model?

transform_glass <- apply(Glass[,-10], 2, BoxCoxTrans)
transform_glass
## $RI
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.511   1.517   1.518   1.518   1.519   1.534 
## 
## Largest/Smallest: 1.02 
## Sample Skewness: 1.6 
## 
## Estimated Lambda: -2 
## 
## 
## $Na
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.73   12.91   13.30   13.41   13.82   17.38 
## 
## Largest/Smallest: 1.62 
## Sample Skewness: 0.448 
## 
## Estimated Lambda: -0.1 
## With fudge factor, Lambda = 0 will be used for transformations
## 
## 
## $Mg
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.115   3.480   2.685   3.600   4.490 
## 
## Lambda could not be estimated; no transformation is applied
## 
## 
## $Al
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.290   1.190   1.360   1.445   1.630   3.500 
## 
## Largest/Smallest: 12.1 
## Sample Skewness: 0.895 
## 
## Estimated Lambda: 0.5 
## 
## 
## $Si
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   69.81   72.28   72.79   72.65   73.09   75.41 
## 
## Largest/Smallest: 1.08 
## Sample Skewness: -0.72 
## 
## Estimated Lambda: 2 
## 
## 
## $K
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1225  0.5550  0.4971  0.6100  6.2100 
## 
## Lambda could not be estimated; no transformation is applied
## 
## 
## $Ca
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.430   8.240   8.600   8.957   9.172  16.190 
## 
## Largest/Smallest: 2.98 
## Sample Skewness: 2.02 
## 
## Estimated Lambda: -1.1 
## 
## 
## $Ba
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.175   0.000   3.150 
## 
## Lambda could not be estimated; no transformation is applied
## 
## 
## $Fe
## Box-Cox Transformation
## 
## 214 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.00000 0.05701 0.10000 0.51000 
## 
## Lambda could not be estimated; no transformation is applied

Yes, the transformations techniques such as log or a Box Cox could help improve the classification model. By definition, removing skews would remove outliers and that improves the model’s performance as well.

  1. The soybean data can also be found at the UC Irvine Machine Learning Repository. Data were collected to predict disease in 683 soybeans. The 35 predictors are mostly categorical and include information on the environmental conditions (e.g., temperature, precipitation) and plant conditions (e.g., left spots, mold growth). The outcome labels consist of 19 distinct classes. The data can be loaded via:
data(Soybean)
## See ?Soybean for details

Description: There are 19 classes, only the first 15 of which have been used in prior work. The folklore seems to be that the last four classes are unjustified by the data since they have so few examples. There are 35 categorical attributes, some nominal and some ordered. The value “dna” means does not apply. The values for attributes are encoded numerically, with the first value encoded as “0,” the second as “1,” and so forth.

A data frame with 683 observations on 36 variables. There are 35 categorical attributes, all numerical and a nominal denoting the class.

Investigate the frequency distributions for the categorical predictors. Are any of the distributions degenerate in the ways discussed earlier in this chapter?

kable(head(Soybean))
Class date plant.stand precip temp hail crop.hist area.dam sever seed.tmt germ plant.growth leaves leaf.halo leaf.marg leaf.size leaf.shread leaf.malf leaf.mild stem lodging stem.cankers canker.lesion fruiting.bodies ext.decay mycelium int.discolor sclerotia fruit.pods fruit.spots seed mold.growth seed.discolor seed.size shriveling roots
diaporthe-stem-canker 6 0 2 1 0 1 1 1 0 0 1 1 0 2 2 0 0 0 1 1 3 1 1 1 0 0 0 0 4 0 0 0 0 0 0
diaporthe-stem-canker 4 0 2 1 0 2 0 2 1 1 1 1 0 2 2 0 0 0 1 0 3 1 1 1 0 0 0 0 4 0 0 0 0 0 0
diaporthe-stem-canker 3 0 2 1 0 1 0 2 1 2 1 1 0 2 2 0 0 0 1 0 3 0 1 1 0 0 0 0 4 0 0 0 0 0 0
diaporthe-stem-canker 3 0 2 1 0 1 0 2 0 1 1 1 0 2 2 0 0 0 1 0 3 0 1 1 0 0 0 0 4 0 0 0 0 0 0
diaporthe-stem-canker 6 0 2 1 0 2 0 1 0 2 1 1 0 2 2 0 0 0 1 0 3 1 1 1 0 0 0 0 4 0 0 0 0 0 0
diaporthe-stem-canker 5 0 2 1 0 3 0 1 0 1 1 1 0 2 2 0 0 0 1 0 3 0 1 1 0 0 0 0 4 0 0 0 0 0 0
ggplot(melt(Soybean, id.vars=c('Class')), aes(x=value)) + 
  geom_histogram(stat="count") + 
  facet_wrap(~variable, scale="free")

Let’s remove near-zero variance predictors using the caret package and the nearZeroVar() function.

nearZeroVar(Soybean)
## [1] 19 26 28

So, the degenerate distributions are columns 19, 26, and 28.

Roughly 18% of the data are mising. Are there particular predictors that are more likely to be missing? Is the pattern of missing data related to the classes?

Soybean.na <- apply(Soybean, 2, function(x){sum(is.na(x))})
Soybean.na
##           Class            date     plant.stand          precip 
##               0               1              36              38 
##            temp            hail       crop.hist        area.dam 
##              30             121              16               1 
##           sever        seed.tmt            germ    plant.growth 
##             121             121             112              16 
##          leaves       leaf.halo       leaf.marg       leaf.size 
##               0              84              84              84 
##     leaf.shread       leaf.malf       leaf.mild            stem 
##             100              84             108              16 
##         lodging    stem.cankers   canker.lesion fruiting.bodies 
##             121              38              38             106 
##       ext.decay        mycelium    int.discolor       sclerotia 
##              38              38              38              38 
##      fruit.pods     fruit.spots            seed     mold.growth 
##              84             106              92              92 
##   seed.discolor       seed.size      shriveling           roots 
##             106              92             106              31
number.of.na <- apply(Soybean, 1, function(x){sum(is.na(x))})
class.soybean <- Soybean$Class
soybean.na.df <- data.frame(class.soybean, number.of.na)
kable(head(soybean.na.df,10))
class.soybean number.of.na
diaporthe-stem-canker 0
diaporthe-stem-canker 0
diaporthe-stem-canker 0
diaporthe-stem-canker 0
diaporthe-stem-canker 0
diaporthe-stem-canker 0
diaporthe-stem-canker 0
diaporthe-stem-canker 0
diaporthe-stem-canker 0
diaporthe-stem-canker 0
results <- aggregate(soybean.na.df$number.of.na, by=list(class.soybean=soybean.na.df$class.soybean), FUN=sum)
kable(results[order(results[,"x"]),])
class.soybean x
2 alternarialeaf-spot 0
3 anthracnose 0
4 bacterial-blight 0
5 bacterial-pustule 0
6 brown-spot 0
7 brown-stem-rot 0
8 charcoal-rot 0
11 diaporthe-stem-canker 0
12 downy-mildew 0
13 frog-eye-leaf-spot 0
15 phyllosticta-leaf-spot 0
17 powdery-mildew 0
18 purple-seed-stain 0
19 rhizoctonia-root-rot 0
14 herbicide-injury 160
10 diaporthe-pod-&-stem-blight 177
9 cyst-nematode 336
1 2-4-d-injury 450
16 phytophthora-rot 1214

Develop a strategy for handling missing data, either by eliminating predictors or imputation.

head(Soybean[Soybean$Class=='phytophthora-rot',],10)
##               Class date plant.stand precip temp hail crop.hist area.dam
## 31 phytophthora-rot    0           1      2    1    1         1        1
## 32 phytophthora-rot    1           1      2    1 <NA>         3        1
## 33 phytophthora-rot    2           1      2    2 <NA>         2        1
## 34 phytophthora-rot    1           1      2    0    0         2        1
## 35 phytophthora-rot    2           1      2    2 <NA>         2        1
## 36 phytophthora-rot    3           1      2    1 <NA>         2        1
## 37 phytophthora-rot    0           1      1    1    0         1        1
## 38 phytophthora-rot    3           1      2    0    0         2        1
## 39 phytophthora-rot    2           1      1    1 <NA>         0        1
## 40 phytophthora-rot    2           1      2    0    0         1        1
##    sever seed.tmt germ plant.growth leaves leaf.halo leaf.marg leaf.size
## 31     1        0    0            1      1         0         2         2
## 32  <NA>     <NA> <NA>            1      1         0         2         2
## 33  <NA>     <NA> <NA>            1      1      <NA>      <NA>      <NA>
## 34     2        1    1            1      1         0         2         2
## 35  <NA>     <NA> <NA>            1      1      <NA>      <NA>      <NA>
## 36  <NA>     <NA> <NA>            1      1      <NA>      <NA>      <NA>
## 37     1        0    0            1      1         0         2         2
## 38     2        1    1            1      1         0         2         2
## 39  <NA>     <NA> <NA>            1      1         0         2         2
## 40     2        0    1            1      1         0         2         2
##    leaf.shread leaf.malf leaf.mild stem lodging stem.cankers canker.lesion
## 31           0         0         0    1       0            1             2
## 32           0         0         0    1    <NA>            2             2
## 33        <NA>      <NA>      <NA>    1    <NA>            3             2
## 34           0         0         0    1       0            2             2
## 35        <NA>      <NA>      <NA>    1    <NA>            2             2
## 36        <NA>      <NA>      <NA>    1    <NA>            3             2
## 37           0         0         0    1       0            1             2
## 38           0         0         0    1       0            2             2
## 39           0         0         0    1    <NA>            2             2
## 40           0         0         0    1       0            1             2
##    fruiting.bodies ext.decay mycelium int.discolor sclerotia fruit.pods
## 31               0         1        0            0         0          3
## 32            <NA>         0        0            0         0       <NA>
## 33            <NA>         0        0            0         0       <NA>
## 34               0         0        0            0         0          3
## 35            <NA>         0        0            0         0       <NA>
## 36            <NA>         0        0            0         0       <NA>
## 37               0         0        0            0         0          3
## 38               0         0        0            0         0          3
## 39            <NA>         0        0            0         0       <NA>
## 40               0         0        0            0         0          3
##    fruit.spots seed mold.growth seed.discolor seed.size shriveling roots
## 31           4    0           0             0         0          0     0
## 32        <NA> <NA>        <NA>          <NA>      <NA>       <NA>     1
## 33        <NA> <NA>        <NA>          <NA>      <NA>       <NA>     1
## 34           4    0           0             0         0          0     0
## 35        <NA> <NA>        <NA>          <NA>      <NA>       <NA>     1
## 36        <NA> <NA>        <NA>          <NA>      <NA>       <NA>     1
## 37           4    0           0             0         0          0     0
## 38           4    0           0             0         0          0     0
## 39        <NA> <NA>        <NA>          <NA>      <NA>       <NA>     1
## 40           4    0           0             0         0          0     0

(reference : page 54) To impute missing values, the impute package has a function, impute.knn, that uses Knearest neighbors to estimate the missing data. The previously mentioned preProcess function applies imputation methods based on K-nearest neighbors or bagged trees.

# reference : https://www.rdocumentation.org/packages/DMwR/versions/0.4.1/topics/knnImputation
library(DMwR)
imputed.data <- knnImputation(Soybean,k=10)
head(imputed.data,10)
##                    Class date plant.stand precip temp hail crop.hist
## 1  diaporthe-stem-canker    6           0      2    1    0         1
## 2  diaporthe-stem-canker    4           0      2    1    0         2
## 3  diaporthe-stem-canker    3           0      2    1    0         1
## 4  diaporthe-stem-canker    3           0      2    1    0         1
## 5  diaporthe-stem-canker    6           0      2    1    0         2
## 6  diaporthe-stem-canker    5           0      2    1    0         3
## 7  diaporthe-stem-canker    5           0      2    1    0         2
## 8  diaporthe-stem-canker    4           0      2    1    1         1
## 9  diaporthe-stem-canker    6           0      2    1    0         3
## 10 diaporthe-stem-canker    4           0      2    1    0         2
##    area.dam sever seed.tmt germ plant.growth leaves leaf.halo leaf.marg
## 1         1     1        0    0            1      1         0         2
## 2         0     2        1    1            1      1         0         2
## 3         0     2        1    2            1      1         0         2
## 4         0     2        0    1            1      1         0         2
## 5         0     1        0    2            1      1         0         2
## 6         0     1        0    1            1      1         0         2
## 7         0     1        1    0            1      1         0         2
## 8         0     1        0    2            1      1         0         2
## 9         0     1        1    1            1      1         0         2
## 10        0     2        0    2            1      1         0         2
##    leaf.size leaf.shread leaf.malf leaf.mild stem lodging stem.cankers
## 1          2           0         0         0    1       1            3
## 2          2           0         0         0    1       0            3
## 3          2           0         0         0    1       0            3
## 4          2           0         0         0    1       0            3
## 5          2           0         0         0    1       0            3
## 6          2           0         0         0    1       0            3
## 7          2           0         0         0    1       1            3
## 8          2           0         0         0    1       0            3
## 9          2           0         0         0    1       0            3
## 10         2           0         0         0    1       0            3
##    canker.lesion fruiting.bodies ext.decay mycelium int.discolor sclerotia
## 1              1               1         1        0            0         0
## 2              1               1         1        0            0         0
## 3              0               1         1        0            0         0
## 4              0               1         1        0            0         0
## 5              1               1         1        0            0         0
## 6              0               1         1        0            0         0
## 7              1               1         1        0            0         0
## 8              1               1         1        0            0         0
## 9              1               1         1        0            0         0
## 10             1               1         1        0            0         0
##    fruit.pods fruit.spots seed mold.growth seed.discolor seed.size
## 1           0           4    0           0             0         0
## 2           0           4    0           0             0         0
## 3           0           4    0           0             0         0
## 4           0           4    0           0             0         0
## 5           0           4    0           0             0         0
## 6           0           4    0           0             0         0
## 7           0           4    0           0             0         0
## 8           0           4    0           0             0         0
## 9           0           4    0           0             0         0
## 10          0           4    0           0             0         0
##    shriveling roots
## 1           0     0
## 2           0     0
## 3           0     0
## 4           0     0
## 5           0     0
## 6           0     0
## 7           0     0
## 8           0     0
## 9           0     0
## 10          0     0
anyNA(imputed.data)
## [1] FALSE

Reference: