From Data Smart by John Foreman
http://www.wiley.com/WileyCDA/WileyTitle/productCd-111866146X.html.

Outlier Detection

Another very simple and easy set!

PregnancyDuration <- read.csv('data/PregnancyDuration.csv')
summary(PregnancyDuration)
##  GestationDays  
##  Min.   :240.0  
##  1st Qu.:260.0  
##  Median :267.0  
##  Mean   :266.6  
##  3rd Qu.:272.0  
##  Max.   :349.0
PregnancyDuration.IQR <- 272 - 260
PregnancyDuration.IQR
## [1] 12
# or this way
PregnancyDuration.IQR <- IQR(PregnancyDuration$GestationDays)
PregnancyDuration.IQR
## [1] 12
LowerInnerFence <- 260 - 1.5 * PregnancyDuration.IQR
UpperInnerFence <- 272 + 1.5 * PregnancyDuration.IQR

LowerInnerFence
## [1] 242
UpperInnerFence
## [1] 290
which(PregnancyDuration$GestationDays > UpperInnerFence)
## [1]   1 249 252 338 345 378 478 913
PregnancyDuration$GestationDays[which(PregnancyDuration$GestationDays > UpperInnerFence)]
## [1] 349 292 295 291 297 303 293 296
boxplot(PregnancyDuration$GestationDays)

boxplot(PregnancyDuration$GestationDays, range=3)

Can pull the data out of the boxplot this way.

boxplot(PregnancyDuration$GestationDays, range=3)$stats

##      [,1]
## [1,]  240
## [2,]  260
## [3,]  267
## [4,]  272
## [5,]  303
## attr(,"class")
##         1 
## "integer"

Or you can get the outlier values like so.

boxplot(PregnancyDuration$GestationDays, range=3)$out

## [1] 349