MCAR and MAR observations are called ignorable, while MNAR values are called non-ignorable or informative.

Imputing Missing Data with MICE

Substitute the means for other variables with missingness, run a regression to get the imputed values for the first variable; next, run a regression for the second variable and imputed using the first variable’s updated values; repeat the above steps until all missing data was imputed by regression; that is the first iterate. repeat the above process by 50 iterates to get the first imputed dataset. repeat the above procedure 5 times to get 5 imputed datasets.

library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
data <- airquality
summary(data)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 
md.pattern(data)

##     Wind Temp Month Day Solar.R Ozone   
## 111    1    1     1   1       1     1  0
## 35     1    1     1   1       1     0  1
## 5      1    1     1   1       0     1  1
## 2      1    1     1   1       0     0  2
##        0    0     0   0       7    37 44
hist(data$Ozone)

imputed_zero = replace(data$Ozone, is.na(data$Ozone), 0) 
imputed_mean = replace(data$Ozone, is.na(data$Ozone), mean(data$Ozone, na.rm = TRUE))
imputed_median = replace(data$Ozone, is.na(data$Ozone), median(data$Ozone, na.rm = TRUE))
hist(imputed_zero)

hist(imputed_mean)

hist(imputed_median)

imputed_pmm = complete(mice(data, method = "pmm",seed=500))$Ozone #default the first imputed dataset
## 
##  iter imp variable
##   1   1  Ozone  Solar.R
##   1   2  Ozone  Solar.R
##   1   3  Ozone  Solar.R
##   1   4  Ozone  Solar.R
##   1   5  Ozone  Solar.R
##   2   1  Ozone  Solar.R
##   2   2  Ozone  Solar.R
##   2   3  Ozone  Solar.R
##   2   4  Ozone  Solar.R
##   2   5  Ozone  Solar.R
##   3   1  Ozone  Solar.R
##   3   2  Ozone  Solar.R
##   3   3  Ozone  Solar.R
##   3   4  Ozone  Solar.R
##   3   5  Ozone  Solar.R
##   4   1  Ozone  Solar.R
##   4   2  Ozone  Solar.R
##   4   3  Ozone  Solar.R
##   4   4  Ozone  Solar.R
##   4   5  Ozone  Solar.R
##   5   1  Ozone  Solar.R
##   5   2  Ozone  Solar.R
##   5   3  Ozone  Solar.R
##   5   4  Ozone  Solar.R
##   5   5  Ozone  Solar.R
imputed_cart = complete(mice(data, method = "cart",seed=500))$Ozone
## 
##  iter imp variable
##   1   1  Ozone  Solar.R
##   1   2  Ozone  Solar.R
##   1   3  Ozone  Solar.R
##   1   4  Ozone  Solar.R
##   1   5  Ozone  Solar.R
##   2   1  Ozone  Solar.R
##   2   2  Ozone  Solar.R
##   2   3  Ozone  Solar.R
##   2   4  Ozone  Solar.R
##   2   5  Ozone  Solar.R
##   3   1  Ozone  Solar.R
##   3   2  Ozone  Solar.R
##   3   3  Ozone  Solar.R
##   3   4  Ozone  Solar.R
##   3   5  Ozone  Solar.R
##   4   1  Ozone  Solar.R
##   4   2  Ozone  Solar.R
##   4   3  Ozone  Solar.R
##   4   4  Ozone  Solar.R
##   4   5  Ozone  Solar.R
##   5   1  Ozone  Solar.R
##   5   2  Ozone  Solar.R
##   5   3  Ozone  Solar.R
##   5   4  Ozone  Solar.R
##   5   5  Ozone  Solar.R
imputed_lasso = complete(mice(data, method = "lasso.norm",seed=500))$Ozone
## 
##  iter imp variable
##   1   1  Ozone  Solar.R
##   1   2  Ozone  Solar.R
##   1   3  Ozone  Solar.R
##   1   4  Ozone  Solar.R
##   1   5  Ozone  Solar.R
##   2   1  Ozone  Solar.R
##   2   2  Ozone  Solar.R
##   2   3  Ozone  Solar.R
##   2   4  Ozone  Solar.R
##   2   5  Ozone  Solar.R
##   3   1  Ozone  Solar.R
##   3   2  Ozone  Solar.R
##   3   3  Ozone  Solar.R
##   3   4  Ozone  Solar.R
##   3   5  Ozone  Solar.R
##   4   1  Ozone  Solar.R
##   4   2  Ozone  Solar.R
##   4   3  Ozone  Solar.R
##   4   4  Ozone  Solar.R
##   4   5  Ozone  Solar.R
##   5   1  Ozone  Solar.R
##   5   2  Ozone  Solar.R
##   5   3  Ozone  Solar.R
##   5   4  Ozone  Solar.R
##   5   5  Ozone  Solar.R

lasso is different with other two

hist(data$Ozone)

hist(imputed_pmm)

hist(imputed_cart)

hist(imputed_lasso)

tempData <- mice(data,m=5,maxit=50,meth='pmm',seed=500) #m=5 refers to the number of imputed datasets.
## 
##  iter imp variable
##   1   1  Ozone  Solar.R
##   1   2  Ozone  Solar.R
##   1   3  Ozone  Solar.R
##   1   4  Ozone  Solar.R
##   1   5  Ozone  Solar.R
##   2   1  Ozone  Solar.R
##   2   2  Ozone  Solar.R
##   2   3  Ozone  Solar.R
##   2   4  Ozone  Solar.R
##   2   5  Ozone  Solar.R
##   3   1  Ozone  Solar.R
##   3   2  Ozone  Solar.R
##   3   3  Ozone  Solar.R
##   3   4  Ozone  Solar.R
##   3   5  Ozone  Solar.R
##   4   1  Ozone  Solar.R
##   4   2  Ozone  Solar.R
##   4   3  Ozone  Solar.R
##   4   4  Ozone  Solar.R
##   4   5  Ozone  Solar.R
##   5   1  Ozone  Solar.R
##   5   2  Ozone  Solar.R
##   5   3  Ozone  Solar.R
##   5   4  Ozone  Solar.R
##   5   5  Ozone  Solar.R
##   6   1  Ozone  Solar.R
##   6   2  Ozone  Solar.R
##   6   3  Ozone  Solar.R
##   6   4  Ozone  Solar.R
##   6   5  Ozone  Solar.R
##   7   1  Ozone  Solar.R
##   7   2  Ozone  Solar.R
##   7   3  Ozone  Solar.R
##   7   4  Ozone  Solar.R
##   7   5  Ozone  Solar.R
##   8   1  Ozone  Solar.R
##   8   2  Ozone  Solar.R
##   8   3  Ozone  Solar.R
##   8   4  Ozone  Solar.R
##   8   5  Ozone  Solar.R
##   9   1  Ozone  Solar.R
##   9   2  Ozone  Solar.R
##   9   3  Ozone  Solar.R
##   9   4  Ozone  Solar.R
##   9   5  Ozone  Solar.R
##   10   1  Ozone  Solar.R
##   10   2  Ozone  Solar.R
##   10   3  Ozone  Solar.R
##   10   4  Ozone  Solar.R
##   10   5  Ozone  Solar.R
##   11   1  Ozone  Solar.R
##   11   2  Ozone  Solar.R
##   11   3  Ozone  Solar.R
##   11   4  Ozone  Solar.R
##   11   5  Ozone  Solar.R
##   12   1  Ozone  Solar.R
##   12   2  Ozone  Solar.R
##   12   3  Ozone  Solar.R
##   12   4  Ozone  Solar.R
##   12   5  Ozone  Solar.R
##   13   1  Ozone  Solar.R
##   13   2  Ozone  Solar.R
##   13   3  Ozone  Solar.R
##   13   4  Ozone  Solar.R
##   13   5  Ozone  Solar.R
##   14   1  Ozone  Solar.R
##   14   2  Ozone  Solar.R
##   14   3  Ozone  Solar.R
##   14   4  Ozone  Solar.R
##   14   5  Ozone  Solar.R
##   15   1  Ozone  Solar.R
##   15   2  Ozone  Solar.R
##   15   3  Ozone  Solar.R
##   15   4  Ozone  Solar.R
##   15   5  Ozone  Solar.R
##   16   1  Ozone  Solar.R
##   16   2  Ozone  Solar.R
##   16   3  Ozone  Solar.R
##   16   4  Ozone  Solar.R
##   16   5  Ozone  Solar.R
##   17   1  Ozone  Solar.R
##   17   2  Ozone  Solar.R
##   17   3  Ozone  Solar.R
##   17   4  Ozone  Solar.R
##   17   5  Ozone  Solar.R
##   18   1  Ozone  Solar.R
##   18   2  Ozone  Solar.R
##   18   3  Ozone  Solar.R
##   18   4  Ozone  Solar.R
##   18   5  Ozone  Solar.R
##   19   1  Ozone  Solar.R
##   19   2  Ozone  Solar.R
##   19   3  Ozone  Solar.R
##   19   4  Ozone  Solar.R
##   19   5  Ozone  Solar.R
##   20   1  Ozone  Solar.R
##   20   2  Ozone  Solar.R
##   20   3  Ozone  Solar.R
##   20   4  Ozone  Solar.R
##   20   5  Ozone  Solar.R
##   21   1  Ozone  Solar.R
##   21   2  Ozone  Solar.R
##   21   3  Ozone  Solar.R
##   21   4  Ozone  Solar.R
##   21   5  Ozone  Solar.R
##   22   1  Ozone  Solar.R
##   22   2  Ozone  Solar.R
##   22   3  Ozone  Solar.R
##   22   4  Ozone  Solar.R
##   22   5  Ozone  Solar.R
##   23   1  Ozone  Solar.R
##   23   2  Ozone  Solar.R
##   23   3  Ozone  Solar.R
##   23   4  Ozone  Solar.R
##   23   5  Ozone  Solar.R
##   24   1  Ozone  Solar.R
##   24   2  Ozone  Solar.R
##   24   3  Ozone  Solar.R
##   24   4  Ozone  Solar.R
##   24   5  Ozone  Solar.R
##   25   1  Ozone  Solar.R
##   25   2  Ozone  Solar.R
##   25   3  Ozone  Solar.R
##   25   4  Ozone  Solar.R
##   25   5  Ozone  Solar.R
##   26   1  Ozone  Solar.R
##   26   2  Ozone  Solar.R
##   26   3  Ozone  Solar.R
##   26   4  Ozone  Solar.R
##   26   5  Ozone  Solar.R
##   27   1  Ozone  Solar.R
##   27   2  Ozone  Solar.R
##   27   3  Ozone  Solar.R
##   27   4  Ozone  Solar.R
##   27   5  Ozone  Solar.R
##   28   1  Ozone  Solar.R
##   28   2  Ozone  Solar.R
##   28   3  Ozone  Solar.R
##   28   4  Ozone  Solar.R
##   28   5  Ozone  Solar.R
##   29   1  Ozone  Solar.R
##   29   2  Ozone  Solar.R
##   29   3  Ozone  Solar.R
##   29   4  Ozone  Solar.R
##   29   5  Ozone  Solar.R
##   30   1  Ozone  Solar.R
##   30   2  Ozone  Solar.R
##   30   3  Ozone  Solar.R
##   30   4  Ozone  Solar.R
##   30   5  Ozone  Solar.R
##   31   1  Ozone  Solar.R
##   31   2  Ozone  Solar.R
##   31   3  Ozone  Solar.R
##   31   4  Ozone  Solar.R
##   31   5  Ozone  Solar.R
##   32   1  Ozone  Solar.R
##   32   2  Ozone  Solar.R
##   32   3  Ozone  Solar.R
##   32   4  Ozone  Solar.R
##   32   5  Ozone  Solar.R
##   33   1  Ozone  Solar.R
##   33   2  Ozone  Solar.R
##   33   3  Ozone  Solar.R
##   33   4  Ozone  Solar.R
##   33   5  Ozone  Solar.R
##   34   1  Ozone  Solar.R
##   34   2  Ozone  Solar.R
##   34   3  Ozone  Solar.R
##   34   4  Ozone  Solar.R
##   34   5  Ozone  Solar.R
##   35   1  Ozone  Solar.R
##   35   2  Ozone  Solar.R
##   35   3  Ozone  Solar.R
##   35   4  Ozone  Solar.R
##   35   5  Ozone  Solar.R
##   36   1  Ozone  Solar.R
##   36   2  Ozone  Solar.R
##   36   3  Ozone  Solar.R
##   36   4  Ozone  Solar.R
##   36   5  Ozone  Solar.R
##   37   1  Ozone  Solar.R
##   37   2  Ozone  Solar.R
##   37   3  Ozone  Solar.R
##   37   4  Ozone  Solar.R
##   37   5  Ozone  Solar.R
##   38   1  Ozone  Solar.R
##   38   2  Ozone  Solar.R
##   38   3  Ozone  Solar.R
##   38   4  Ozone  Solar.R
##   38   5  Ozone  Solar.R
##   39   1  Ozone  Solar.R
##   39   2  Ozone  Solar.R
##   39   3  Ozone  Solar.R
##   39   4  Ozone  Solar.R
##   39   5  Ozone  Solar.R
##   40   1  Ozone  Solar.R
##   40   2  Ozone  Solar.R
##   40   3  Ozone  Solar.R
##   40   4  Ozone  Solar.R
##   40   5  Ozone  Solar.R
##   41   1  Ozone  Solar.R
##   41   2  Ozone  Solar.R
##   41   3  Ozone  Solar.R
##   41   4  Ozone  Solar.R
##   41   5  Ozone  Solar.R
##   42   1  Ozone  Solar.R
##   42   2  Ozone  Solar.R
##   42   3  Ozone  Solar.R
##   42   4  Ozone  Solar.R
##   42   5  Ozone  Solar.R
##   43   1  Ozone  Solar.R
##   43   2  Ozone  Solar.R
##   43   3  Ozone  Solar.R
##   43   4  Ozone  Solar.R
##   43   5  Ozone  Solar.R
##   44   1  Ozone  Solar.R
##   44   2  Ozone  Solar.R
##   44   3  Ozone  Solar.R
##   44   4  Ozone  Solar.R
##   44   5  Ozone  Solar.R
##   45   1  Ozone  Solar.R
##   45   2  Ozone  Solar.R
##   45   3  Ozone  Solar.R
##   45   4  Ozone  Solar.R
##   45   5  Ozone  Solar.R
##   46   1  Ozone  Solar.R
##   46   2  Ozone  Solar.R
##   46   3  Ozone  Solar.R
##   46   4  Ozone  Solar.R
##   46   5  Ozone  Solar.R
##   47   1  Ozone  Solar.R
##   47   2  Ozone  Solar.R
##   47   3  Ozone  Solar.R
##   47   4  Ozone  Solar.R
##   47   5  Ozone  Solar.R
##   48   1  Ozone  Solar.R
##   48   2  Ozone  Solar.R
##   48   3  Ozone  Solar.R
##   48   4  Ozone  Solar.R
##   48   5  Ozone  Solar.R
##   49   1  Ozone  Solar.R
##   49   2  Ozone  Solar.R
##   49   3  Ozone  Solar.R
##   49   4  Ozone  Solar.R
##   49   5  Ozone  Solar.R
##   50   1  Ozone  Solar.R
##   50   2  Ozone  Solar.R
##   50   3  Ozone  Solar.R
##   50   4  Ozone  Solar.R
##   50   5  Ozone  Solar.R
summary(tempData)
## Class: mids
## Number of multiple imputations:  5 
## Imputation methods:
##   Ozone Solar.R    Wind    Temp   Month     Day 
##   "pmm"   "pmm"      ""      ""      ""      "" 
## PredictorMatrix:
##         Ozone Solar.R Wind Temp Month Day
## Ozone       0       1    1    1     1   1
## Solar.R     1       0    1    1     1   1
## Wind        1       1    0    1     1   1
## Temp        1       1    1    0     1   1
## Month       1       1    1    1     0   1
## Day         1       1    1    1     1   0
tempData$imp$Ozone
##      1   2   3  4   5
## 5   14   8   6 19   8
## 10  11  20  20 18  44
## 25   8  18  19 19  18
## 26  13  13  37 37  13
## 27  37  13  12 11  21
## 32  59  40  44 47  45
## 33  30  59  13 45  36
## 34  37  32  37 32   1
## 35  63  20  89 35  40
## 36  59  89  39 78  89
## 37  36  16  30 16  41
## 39  61  79  97 50  82
## 42  50 115  77 61  78
## 43  79  50  76 91  79
## 45  45  46  45 28  45
## 46  52  47  35 63  46
## 52  35  59  96 59  35
## 53  48  39  78 39  96
## 54  40  49 110 89  49
## 55  59  64 110 78  39
## 56  28  45  59 44  52
## 57  47  16  48 16  47
## 58  23  21  39 41  30
## 59  52  44  35 16  28
## 60  21  23  23  9  21
## 61  96  48  79 39 110
## 65  36  16  29 23  23
## 72  46  28  35 32  32
## 75  48  40  37 35  48
## 83  49  49  48 71  78
## 84  35  59  59 49  71
## 102 61  85  82 61  82
## 103 28  31  20 45  35
## 107 41  14  16 21  23
## 115 36  13  20 16  23
## 119 66  61  97 79 122
## 150 41  21  36 21  11
completedData <- complete(tempData,1)
# completed <- complete(tempData) 
xyplot(tempData,Ozone ~ Wind+Temp+Solar.R,pch=18,cex=1) #?

densityplot(tempData)

modelFit1 <- with(tempData,lm(Temp~ Ozone+Solar.R+Wind))
summary(pool(modelFit1))
##          term     estimate   std.error  statistic        df      p.value
## 1 (Intercept) 72.401478102 2.901648707 24.9518413 107.46161 1.620476e-46
## 2       Ozone  0.172964671 0.026685861  6.4815099  59.50153 1.963992e-08
## 3     Solar.R  0.007473364 0.007686916  0.9722187  47.48152 3.358687e-01
## 4        Wind -0.329208428 0.207845499 -1.5839093 131.96400 1.156080e-01

Using logistic regression to indentify if it is missing completely at random;

How well is your imputation working? Remove all missing rows; Run statistics on your dataset from #1;Randomly remove values from your dataset; Impute values, calculate statistics, and compare to #2; Repeat steps 3 and 4 with many different removed values + imputation methods;