From Data Smart by John Foreman
http://www.wiley.com/WileyCDA/WileyTitle/productCd-111866146X.html.
Another very simple and easy set!
PregnancyDuration <- read.csv('data/PregnancyDuration.csv')
summary(PregnancyDuration)
## GestationDays
## Min. :240.0
## 1st Qu.:260.0
## Median :267.0
## Mean :266.6
## 3rd Qu.:272.0
## Max. :349.0
PregnancyDuration.IQR <- 272 - 260
PregnancyDuration.IQR
## [1] 12
# or this way
PregnancyDuration.IQR <- IQR(PregnancyDuration$GestationDays)
PregnancyDuration.IQR
## [1] 12
LowerInnerFence <- 260 - 1.5 * PregnancyDuration.IQR
UpperInnerFence <- 272 + 1.5 * PregnancyDuration.IQR
LowerInnerFence
## [1] 242
UpperInnerFence
## [1] 290
which(PregnancyDuration$GestationDays > UpperInnerFence)
## [1] 1 249 252 338 345 378 478 913
PregnancyDuration$GestationDays[which(PregnancyDuration$GestationDays > UpperInnerFence)]
## [1] 349 292 295 291 297 303 293 296
boxplot(PregnancyDuration$GestationDays)
boxplot(PregnancyDuration$GestationDays, range=3)
Can pull the data out of the boxplot this way.
boxplot(PregnancyDuration$GestationDays, range=3)$stats
## [,1]
## [1,] 240
## [2,] 260
## [3,] 267
## [4,] 272
## [5,] 303
## attr(,"class")
## 1
## "integer"
Or you can get the outlier values like so.
boxplot(PregnancyDuration$GestationDays, range=3)$out
## [1] 349