Load Libraries
#install.packages("xlsx")
#install.packages("corrgram")
Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jre1.8.0_221')
library(xlsx)
library(ggplot2)
Read xlsx Sheet1
# read xlsx sheet Ex1
df1 <- read.xlsx('E:\\R-Files\\Outliers1.xlsx', sheetName="Sheet1", header=TRUE, colClasses=NA)
head(df1)
## data
## 1 514
## 2 220
## 3 810
## 4 721
## 5 991
## 6 438
Detect Outliers using IQR
# detect Outliers
outliers_iqr <- function(inp, na.rm=TRUE) {
i.qnt <- quantile(inp, probs=c(.25, .75), na.rm=na.rm)
i.max <- 1.5 * IQR(inp, na.rm=na.rm)
otp <- inp
otp[inp < (i.qnt[1] - i.max)] <- NA
otp[inp > (i.qnt[2] + i.max)] <- NA
inp[is.na(otp)]
}
outliers_iqr(df1$data)
## [1] 2019 1999 1971
Detect Outliers using STD_DEV
# detect Outliers
outliers_sd <- function(inp, na.rm=TRUE) {
lfence <- mean(inp) - (3 * sd(inp))
ufence <- mean(inp) + (3 * sd(inp))
otp <- inp
otp[inp < lfence] <- NA
otp[inp > ufence] <- NA
inp[is.na(otp)]
}
outliers_sd(df1$data)
## [1] 2019 1999
Read xlsx Sheet2
# read xlsx sheet Ex2
df2 <- read.xlsx('E:\\R-Files\\Outliers1.xlsx', sheetName="Sheet2", header=TRUE, colClasses=NA)
head(df2)
## data
## 1 3.98580730
## 2 0.06947271
## 3 5.93342246
## 4 4.28510776
## 5 3.34469264
## 6 5.61086322
Detect Outliers Using IQR
# detect Outliers
outliers_iqr <- function(inp, na.rm=TRUE) {
i.qnt <- quantile(inp, probs=c(.25, .75), na.rm=na.rm)
i.max <- 1.5 * IQR(inp, na.rm=na.rm)
otp <- inp
otp[inp < (i.qnt[1] - i.max)] <- NA
otp[inp > (i.qnt[2] + i.max)] <- NA
inp[is.na(otp)]
}
outliers_iqr(df2$data)
## [1] 15.93889 18.61065 17.03925
Detect Outliers Using STD_DEV
# detect Outliers
outliers_sd <- function(inp, na.rm=TRUE) {
lfence <- mean(inp) - (3 * sd(inp))
ufence <- mean(inp) + (3 * sd(inp))
otp <- inp
otp[inp < lfence] <- NA
otp[inp > ufence] <- NA
inp[is.na(otp)]
}
outliers_sd(df2$data)
## [1] 18.61065 17.03925