References
##################################################################################################
# Robust outlier detection
# Location estimated with the median
# Scale estimated with the normalised mean absolute deviation
##################################################################################################
rm(list=ls())
# Normalised Median Absolute Deviation
b <- 1/qnorm(0.75) # Normalisation constant
NMAD <- function(data) b*median( abs(data - median(data)))
# Data (guess which entries are outliers by eye)
data <- c(3, 5, 5, 7, 9, 11, 11, 1000)
# Number of NMADS from the Median
a <- 3
# NMAD and Median
NMAD.data <- NMAD(data)
med <- median(data)
c(med,NMAD.data)
## [1] 8.000000 4.447807
# Robust outlier detection
Rob.Out.detect <- Vectorize(function(x){
val <- abs((x - med)/NMAD.data)
return(ifelse(val>a, TRUE, FALSE))
} )
# Identifying outliers in the data with the robust method
cbind.data.frame(data,Rob.Out.detect(data))
## data Rob.Out.detect(data)
## 1 3 FALSE
## 2 5 FALSE
## 3 5 FALSE
## 4 7 FALSE
## 5 9 FALSE
## 6 11 FALSE
## 7 11 FALSE
## 8 1000 TRUE
##################################################################################################
# Non-Robust outlier detection
# Location estimated with the mean
# Scale estimated with the standard deviation
##################################################################################################
# Sample standard deviation
s.sigma <- function(data) sqrt( mean((data - mean(data))^2) )
# Number of SDs from the Mean
a <- 3
# Mean and standard deviation
mean.data <- mean(data)
sd.data <- s.sigma (data)
c(mean.data,sd.data)
## [1] 131.3750 328.3207
# Non-Robust outlier detection
Out.detect <- Vectorize(function(x){
val <- abs((x - mean.data)/sd.data)
return(ifelse(val>a, TRUE, FALSE))
} )
# Identifying outliers in the data with the non-robust method
cbind.data.frame(data,Out.detect(data))
## data Out.detect(data)
## 1 3 FALSE
## 2 5 FALSE
## 3 5 FALSE
## 4 7 FALSE
## 5 9 FALSE
## 6 11 FALSE
## 7 11 FALSE
## 8 1000 FALSE