## Taking data ‘airquality’ from the inbuilt datasets.
library(mice)
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
data(airquality)
View(airquality) # To view data set.
# To omit rows containing NA's
q <- na.omit(airquality)
View(q)
q1 <- airquality[complete.cases(airquality),]
View(q1)
## Their times when we can’t omit the rows containing NA’s, because data might have huge no. of NA’s which can lead to removing most of the rows. # Replacing NA’s with 0 in column ‘Ozone’.
airquality[is.na(airquality$Ozone),] <- 0
View(airquality)
## But their are times when we can’t afford to replace NA’s with 0, because this can effect the overall mean and sometimes it’s not adviced to do so in case of variables like age, can effect the column data. It does shift sample class.
library(mice)
md.pattern(airquality)
## Ozone Wind Temp Month Day Solar.R
## 148 1 1 1 1 1 1 0
## 5 1 1 1 1 1 0 1
## 0 0 0 0 0 5 5
# Its more simpler way but to have a more controlled & neat method. Use library(VIM).
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
mp <- aggr(airquality, col = c('navyblue', 'yellow'), numbers = TRUE,
labels = names(airquality), cex.axis=0.9, gap=3, ylab=c("Missing data","Pattern"))