This demo is to show how to handle missing data in R
# Working with `airquality` dataset
df<-airquality
head(df)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
Visualizing the percentage of non-NA and NA values in the dataset
library(VIM)
## Warning: package 'VIM' was built under R version 3.2.5
## Loading required package: colorspace
## Warning: package 'colorspace' was built under R version 3.2.5
## Loading required package: grid
## Loading required package: data.table
## Warning: package 'data.table' was built under R version 3.2.5
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(mice)
## Warning: package 'mice' was built under R version 3.2.5
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 3.2.5
## mice 2.25 2015-11-09
md.pattern(df)
## Wind Temp Month Day Solar.R Ozone
## 111 1 1 1 1 1 1 0
## 35 1 1 1 1 1 0 1
## 5 1 1 1 1 0 1 1
## 2 1 1 1 1 0 0 2
## 0 0 0 0 7 37 44
missing_value<-aggr(df,col=c(2,4),numbers=T,sorftVars=T, labels=names(df),cex.axis=0.7,gap=3,ylab=c("missing data","pattern"),axes=T)
missing_value
##
## Missings in variables:
## Variable Count
## Ozone 37
## Solar.R 7
Imputation of missing data using predictive mean mapping
library(mice)
df_im<-mice(df,m=5,maxit = 50,method="pmm",seed=500)
##
## iter imp variable
## 1 1 Ozone Solar.R
## 1 2 Ozone Solar.R
## 1 3 Ozone Solar.R
## 1 4 Ozone Solar.R
## 1 5 Ozone Solar.R
## 2 1 Ozone Solar.R
## 2 2 Ozone Solar.R
## 2 3 Ozone Solar.R
## 2 4 Ozone Solar.R
## 2 5 Ozone Solar.R
## 3 1 Ozone Solar.R
## 3 2 Ozone Solar.R
## 3 3 Ozone Solar.R
## 3 4 Ozone Solar.R
## 3 5 Ozone Solar.R
## 4 1 Ozone Solar.R
## 4 2 Ozone Solar.R
## 4 3 Ozone Solar.R
## 4 4 Ozone Solar.R
## 4 5 Ozone Solar.R
## 5 1 Ozone Solar.R
## 5 2 Ozone Solar.R
## 5 3 Ozone Solar.R
## 5 4 Ozone Solar.R
## 5 5 Ozone Solar.R
## 6 1 Ozone Solar.R
## 6 2 Ozone Solar.R
## 6 3 Ozone Solar.R
## 6 4 Ozone Solar.R
## 6 5 Ozone Solar.R
## 7 1 Ozone Solar.R
## 7 2 Ozone Solar.R
## 7 3 Ozone Solar.R
## 7 4 Ozone Solar.R
## 7 5 Ozone Solar.R
## 8 1 Ozone Solar.R
## 8 2 Ozone Solar.R
## 8 3 Ozone Solar.R
## 8 4 Ozone Solar.R
## 8 5 Ozone Solar.R
## 9 1 Ozone Solar.R
## 9 2 Ozone Solar.R
## 9 3 Ozone Solar.R
## 9 4 Ozone Solar.R
## 9 5 Ozone Solar.R
## 10 1 Ozone Solar.R
## 10 2 Ozone Solar.R
## 10 3 Ozone Solar.R
## 10 4 Ozone Solar.R
## 10 5 Ozone Solar.R
## 11 1 Ozone Solar.R
## 11 2 Ozone Solar.R
## 11 3 Ozone Solar.R
## 11 4 Ozone Solar.R
## 11 5 Ozone Solar.R
## 12 1 Ozone Solar.R
## 12 2 Ozone Solar.R
## 12 3 Ozone Solar.R
## 12 4 Ozone Solar.R
## 12 5 Ozone Solar.R
## 13 1 Ozone Solar.R
## 13 2 Ozone Solar.R
## 13 3 Ozone Solar.R
## 13 4 Ozone Solar.R
## 13 5 Ozone Solar.R
## 14 1 Ozone Solar.R
## 14 2 Ozone Solar.R
## 14 3 Ozone Solar.R
## 14 4 Ozone Solar.R
## 14 5 Ozone Solar.R
## 15 1 Ozone Solar.R
## 15 2 Ozone Solar.R
## 15 3 Ozone Solar.R
## 15 4 Ozone Solar.R
## 15 5 Ozone Solar.R
## 16 1 Ozone Solar.R
## 16 2 Ozone Solar.R
## 16 3 Ozone Solar.R
## 16 4 Ozone Solar.R
## 16 5 Ozone Solar.R
## 17 1 Ozone Solar.R
## 17 2 Ozone Solar.R
## 17 3 Ozone Solar.R
## 17 4 Ozone Solar.R
## 17 5 Ozone Solar.R
## 18 1 Ozone Solar.R
## 18 2 Ozone Solar.R
## 18 3 Ozone Solar.R
## 18 4 Ozone Solar.R
## 18 5 Ozone Solar.R
## 19 1 Ozone Solar.R
## 19 2 Ozone Solar.R
## 19 3 Ozone Solar.R
## 19 4 Ozone Solar.R
## 19 5 Ozone Solar.R
## 20 1 Ozone Solar.R
## 20 2 Ozone Solar.R
## 20 3 Ozone Solar.R
## 20 4 Ozone Solar.R
## 20 5 Ozone Solar.R
## 21 1 Ozone Solar.R
## 21 2 Ozone Solar.R
## 21 3 Ozone Solar.R
## 21 4 Ozone Solar.R
## 21 5 Ozone Solar.R
## 22 1 Ozone Solar.R
## 22 2 Ozone Solar.R
## 22 3 Ozone Solar.R
## 22 4 Ozone Solar.R
## 22 5 Ozone Solar.R
## 23 1 Ozone Solar.R
## 23 2 Ozone Solar.R
## 23 3 Ozone Solar.R
## 23 4 Ozone Solar.R
## 23 5 Ozone Solar.R
## 24 1 Ozone Solar.R
## 24 2 Ozone Solar.R
## 24 3 Ozone Solar.R
## 24 4 Ozone Solar.R
## 24 5 Ozone Solar.R
## 25 1 Ozone Solar.R
## 25 2 Ozone Solar.R
## 25 3 Ozone Solar.R
## 25 4 Ozone Solar.R
## 25 5 Ozone Solar.R
## 26 1 Ozone Solar.R
## 26 2 Ozone Solar.R
## 26 3 Ozone Solar.R
## 26 4 Ozone Solar.R
## 26 5 Ozone Solar.R
## 27 1 Ozone Solar.R
## 27 2 Ozone Solar.R
## 27 3 Ozone Solar.R
## 27 4 Ozone Solar.R
## 27 5 Ozone Solar.R
## 28 1 Ozone Solar.R
## 28 2 Ozone Solar.R
## 28 3 Ozone Solar.R
## 28 4 Ozone Solar.R
## 28 5 Ozone Solar.R
## 29 1 Ozone Solar.R
## 29 2 Ozone Solar.R
## 29 3 Ozone Solar.R
## 29 4 Ozone Solar.R
## 29 5 Ozone Solar.R
## 30 1 Ozone Solar.R
## 30 2 Ozone Solar.R
## 30 3 Ozone Solar.R
## 30 4 Ozone Solar.R
## 30 5 Ozone Solar.R
## 31 1 Ozone Solar.R
## 31 2 Ozone Solar.R
## 31 3 Ozone Solar.R
## 31 4 Ozone Solar.R
## 31 5 Ozone Solar.R
## 32 1 Ozone Solar.R
## 32 2 Ozone Solar.R
## 32 3 Ozone Solar.R
## 32 4 Ozone Solar.R
## 32 5 Ozone Solar.R
## 33 1 Ozone Solar.R
## 33 2 Ozone Solar.R
## 33 3 Ozone Solar.R
## 33 4 Ozone Solar.R
## 33 5 Ozone Solar.R
## 34 1 Ozone Solar.R
## 34 2 Ozone Solar.R
## 34 3 Ozone Solar.R
## 34 4 Ozone Solar.R
## 34 5 Ozone Solar.R
## 35 1 Ozone Solar.R
## 35 2 Ozone Solar.R
## 35 3 Ozone Solar.R
## 35 4 Ozone Solar.R
## 35 5 Ozone Solar.R
## 36 1 Ozone Solar.R
## 36 2 Ozone Solar.R
## 36 3 Ozone Solar.R
## 36 4 Ozone Solar.R
## 36 5 Ozone Solar.R
## 37 1 Ozone Solar.R
## 37 2 Ozone Solar.R
## 37 3 Ozone Solar.R
## 37 4 Ozone Solar.R
## 37 5 Ozone Solar.R
## 38 1 Ozone Solar.R
## 38 2 Ozone Solar.R
## 38 3 Ozone Solar.R
## 38 4 Ozone Solar.R
## 38 5 Ozone Solar.R
## 39 1 Ozone Solar.R
## 39 2 Ozone Solar.R
## 39 3 Ozone Solar.R
## 39 4 Ozone Solar.R
## 39 5 Ozone Solar.R
## 40 1 Ozone Solar.R
## 40 2 Ozone Solar.R
## 40 3 Ozone Solar.R
## 40 4 Ozone Solar.R
## 40 5 Ozone Solar.R
## 41 1 Ozone Solar.R
## 41 2 Ozone Solar.R
## 41 3 Ozone Solar.R
## 41 4 Ozone Solar.R
## 41 5 Ozone Solar.R
## 42 1 Ozone Solar.R
## 42 2 Ozone Solar.R
## 42 3 Ozone Solar.R
## 42 4 Ozone Solar.R
## 42 5 Ozone Solar.R
## 43 1 Ozone Solar.R
## 43 2 Ozone Solar.R
## 43 3 Ozone Solar.R
## 43 4 Ozone Solar.R
## 43 5 Ozone Solar.R
## 44 1 Ozone Solar.R
## 44 2 Ozone Solar.R
## 44 3 Ozone Solar.R
## 44 4 Ozone Solar.R
## 44 5 Ozone Solar.R
## 45 1 Ozone Solar.R
## 45 2 Ozone Solar.R
## 45 3 Ozone Solar.R
## 45 4 Ozone Solar.R
## 45 5 Ozone Solar.R
## 46 1 Ozone Solar.R
## 46 2 Ozone Solar.R
## 46 3 Ozone Solar.R
## 46 4 Ozone Solar.R
## 46 5 Ozone Solar.R
## 47 1 Ozone Solar.R
## 47 2 Ozone Solar.R
## 47 3 Ozone Solar.R
## 47 4 Ozone Solar.R
## 47 5 Ozone Solar.R
## 48 1 Ozone Solar.R
## 48 2 Ozone Solar.R
## 48 3 Ozone Solar.R
## 48 4 Ozone Solar.R
## 48 5 Ozone Solar.R
## 49 1 Ozone Solar.R
## 49 2 Ozone Solar.R
## 49 3 Ozone Solar.R
## 49 4 Ozone Solar.R
## 49 5 Ozone Solar.R
## 50 1 Ozone Solar.R
## 50 2 Ozone Solar.R
## 50 3 Ozone Solar.R
## 50 4 Ozone Solar.R
## 50 5 Ozone Solar.R
head(df_im)
## $call
## mice(data = df, m = 5, method = "pmm", maxit = 50, seed = 500)
##
## $data
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
## 11 7 NA 6.9 74 5 11
## 12 16 256 9.7 69 5 12
## 13 11 290 9.2 66 5 13
## 14 14 274 10.9 68 5 14
## 15 18 65 13.2 58 5 15
## 16 14 334 11.5 64 5 16
## 17 34 307 12.0 66 5 17
## 18 6 78 18.4 57 5 18
## 19 30 322 11.5 68 5 19
## 20 11 44 9.7 62 5 20
## 21 1 8 9.7 59 5 21
## 22 11 320 16.6 73 5 22
## 23 4 25 9.7 61 5 23
## 24 32 92 12.0 61 5 24
## 25 NA 66 16.6 57 5 25
## 26 NA 266 14.9 58 5 26
## 27 NA NA 8.0 57 5 27
## 28 23 13 12.0 67 5 28
## 29 45 252 14.9 81 5 29
## 30 115 223 5.7 79 5 30
## 31 37 279 7.4 76 5 31
## 32 NA 286 8.6 78 6 1
## 33 NA 287 9.7 74 6 2
## 34 NA 242 16.1 67 6 3
## 35 NA 186 9.2 84 6 4
## 36 NA 220 8.6 85 6 5
## 37 NA 264 14.3 79 6 6
## 38 29 127 9.7 82 6 7
## 39 NA 273 6.9 87 6 8
## 40 71 291 13.8 90 6 9
## 41 39 323 11.5 87 6 10
## 42 NA 259 10.9 93 6 11
## 43 NA 250 9.2 92 6 12
## 44 23 148 8.0 82 6 13
## 45 NA 332 13.8 80 6 14
## 46 NA 322 11.5 79 6 15
## 47 21 191 14.9 77 6 16
## 48 37 284 20.7 72 6 17
## 49 20 37 9.2 65 6 18
## 50 12 120 11.5 73 6 19
## 51 13 137 10.3 76 6 20
## 52 NA 150 6.3 77 6 21
## 53 NA 59 1.7 76 6 22
## 54 NA 91 4.6 76 6 23
## 55 NA 250 6.3 76 6 24
## 56 NA 135 8.0 75 6 25
## 57 NA 127 8.0 78 6 26
## 58 NA 47 10.3 73 6 27
## 59 NA 98 11.5 80 6 28
## 60 NA 31 14.9 77 6 29
## 61 NA 138 8.0 83 6 30
## 62 135 269 4.1 84 7 1
## 63 49 248 9.2 85 7 2
## 64 32 236 9.2 81 7 3
## 65 NA 101 10.9 84 7 4
## 66 64 175 4.6 83 7 5
## 67 40 314 10.9 83 7 6
## 68 77 276 5.1 88 7 7
## 69 97 267 6.3 92 7 8
## 70 97 272 5.7 92 7 9
## 71 85 175 7.4 89 7 10
## 72 NA 139 8.6 82 7 11
## 73 10 264 14.3 73 7 12
## 74 27 175 14.9 81 7 13
## 75 NA 291 14.9 91 7 14
## 76 7 48 14.3 80 7 15
## 77 48 260 6.9 81 7 16
## 78 35 274 10.3 82 7 17
## 79 61 285 6.3 84 7 18
## 80 79 187 5.1 87 7 19
## 81 63 220 11.5 85 7 20
## 82 16 7 6.9 74 7 21
## 83 NA 258 9.7 81 7 22
## 84 NA 295 11.5 82 7 23
## 85 80 294 8.6 86 7 24
## 86 108 223 8.0 85 7 25
## 87 20 81 8.6 82 7 26
## 88 52 82 12.0 86 7 27
## 89 82 213 7.4 88 7 28
## 90 50 275 7.4 86 7 29
## 91 64 253 7.4 83 7 30
## 92 59 254 9.2 81 7 31
## 93 39 83 6.9 81 8 1
## 94 9 24 13.8 81 8 2
## 95 16 77 7.4 82 8 3
## 96 78 NA 6.9 86 8 4
## 97 35 NA 7.4 85 8 5
## 98 66 NA 4.6 87 8 6
## 99 122 255 4.0 89 8 7
## 100 89 229 10.3 90 8 8
## 101 110 207 8.0 90 8 9
## 102 NA 222 8.6 92 8 10
## 103 NA 137 11.5 86 8 11
## 104 44 192 11.5 86 8 12
## 105 28 273 11.5 82 8 13
## 106 65 157 9.7 80 8 14
## 107 NA 64 11.5 79 8 15
## 108 22 71 10.3 77 8 16
## 109 59 51 6.3 79 8 17
## 110 23 115 7.4 76 8 18
## 111 31 244 10.9 78 8 19
## 112 44 190 10.3 78 8 20
## 113 21 259 15.5 77 8 21
## 114 9 36 14.3 72 8 22
## 115 NA 255 12.6 75 8 23
## 116 45 212 9.7 79 8 24
## 117 168 238 3.4 81 8 25
## 118 73 215 8.0 86 8 26
## 119 NA 153 5.7 88 8 27
## 120 76 203 9.7 97 8 28
## 121 118 225 2.3 94 8 29
## 122 84 237 6.3 96 8 30
## 123 85 188 6.3 94 8 31
## 124 96 167 6.9 91 9 1
## 125 78 197 5.1 92 9 2
## 126 73 183 2.8 93 9 3
## 127 91 189 4.6 93 9 4
## 128 47 95 7.4 87 9 5
## 129 32 92 15.5 84 9 6
## 130 20 252 10.9 80 9 7
## 131 23 220 10.3 78 9 8
## 132 21 230 10.9 75 9 9
## 133 24 259 9.7 73 9 10
## 134 44 236 14.9 81 9 11
## 135 21 259 15.5 76 9 12
## 136 28 238 6.3 77 9 13
## 137 9 24 10.9 71 9 14
## 138 13 112 11.5 71 9 15
## 139 46 237 6.9 78 9 16
## 140 18 224 13.8 67 9 17
## 141 13 27 10.3 76 9 18
## 142 24 238 10.3 68 9 19
## 143 16 201 8.0 82 9 20
## 144 13 238 12.6 64 9 21
## 145 23 14 9.2 71 9 22
## 146 36 139 10.3 81 9 23
## 147 7 49 10.3 69 9 24
## 148 14 20 16.6 63 9 25
## 149 30 193 6.9 70 9 26
## 150 NA 145 13.2 77 9 27
## 151 14 191 14.3 75 9 28
## 152 18 131 8.0 76 9 29
## 153 20 223 11.5 68 9 30
##
## $m
## [1] 5
##
## $nmis
## Ozone Solar.R Wind Temp Month Day
## 37 7 0 0 0 0
##
## $imp
## $imp$Ozone
## 1 2 3 4 5
## 5 6 1 14 18 6
## 10 12 23 30 21 23
## 25 8 19 6 14 19
## 26 32 32 28 19 19
## 27 13 12 37 18 7
## 32 59 20 59 36 78
## 33 16 31 23 11 18
## 34 1 13 13 37 13
## 35 44 37 7 35 71
## 36 35 108 59 35 39
## 37 14 16 30 30 20
## 39 115 91 115 168 82
## 42 64 77 168 66 78
## 43 61 91 79 82 91
## 45 23 29 23 44 44
## 46 45 63 39 45 20
## 52 45 71 20 52 35
## 53 20 80 48 23 49
## 54 7 37 35 52 20
## 55 35 39 49 20 23
## 56 13 40 39 36 31
## 57 36 49 40 46 46
## 58 9 44 41 23 21
## 59 16 32 23 39 36
## 60 23 41 32 44 32
## 61 40 85 48 39 39
## 65 23 16 65 28 59
## 72 59 20 29 52 63
## 75 40 47 47 59 108
## 83 32 23 35 44 59
## 84 28 16 40 29 59
## 102 115 85 61 168 91
## 103 65 39 16 32 16
## 107 12 14 16 22 23
## 115 24 12 16 21 36
## 119 78 78 50 61 78
## 150 12 10 14 21 23
##
## $imp$Solar.R
## 1 2 3 4 5
## 5 135 259 201 238 47
## 6 285 225 120 322 307
## 11 275 255 255 138 66
## 27 238 20 230 238 223
## 96 225 267 175 187 272
## 97 83 236 254 258 139
## 98 314 203 225 92 167
##
## $imp$Wind
## NULL
##
## $imp$Temp
## NULL
##
## $imp$Month
## NULL
##
## $imp$Day
## NULL
##
##
## $method
## Ozone Solar.R Wind Temp Month Day
## "pmm" "pmm" "pmm" "pmm" "pmm" "pmm"
# Get data back to the original format
df1<-complete(df_im,1)
head(df1)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 6 135 14.3 56 5 5
## 6 28 285 14.9 66 5 6
Another way of filling missing data
df2<-df
df2$Ozone[which(is.na(df2$Ozone))]<-mean(df2$Ozone,na.rm = T)
head(df2)
## Ozone Solar.R Wind Temp Month Day
## 1 41.00000 190 7.4 67 5 1
## 2 36.00000 118 8.0 72 5 2
## 3 12.00000 149 12.6 74 5 3
## 4 18.00000 313 11.5 62 5 4
## 5 42.12931 NA 14.3 56 5 5
## 6 28.00000 NA 14.9 66 5 6
df3<-df
mean1<-mean(df3$Ozone,na.rm = T)
df3$Ozone<-ifelse(is.na(df3$Ozone)==T,mean1,df3$Ozone)
head(df3)
## Ozone Solar.R Wind Temp Month Day
## 1 41.00000 190 7.4 67 5 1
## 2 36.00000 118 8.0 72 5 2
## 3 12.00000 149 12.6 74 5 3
## 4 18.00000 313 11.5 62 5 4
## 5 42.12931 NA 14.3 56 5 5
## 6 28.00000 NA 14.9 66 5 6