Data
ID<-c(16762,16439,16211,16790,16443,16998,
16543,16779,16945,16111,16224,16980,
16779,16000,16111,16224,16400,16327)
Name<-c("Ahmed","Osama","Ibraheem","Fahd",
"Majeda","Hdeel","Mohammed","Remas",
"Rteel","Abdalrhman","Mhdi","Tala",
"Remas","Nadiah","Abdalrhman","Mhdi",
"Lila","Fatima")
Age<-c(10,2,2,27,9,32,9,10,29,28,9,
9,30,29,28,6,33 ,100)
Sex<-c("M","M","M","M","F","F","M","F",
"F","M","M","F","F","F","M","M",
"F","F")
data<-data.frame(ID,Name,Age,Sex)
data
## ID Name Age Sex
## 1 16762 Ahmed 10 M
## 2 16439 Osama 2 M
## 3 16211 Ibraheem 2 M
## 4 16790 Fahd 27 M
## 5 16443 Majeda 9 F
## 6 16998 Hdeel 32 F
## 7 16543 Mohammed 9 M
## 8 16779 Remas 10 F
## 9 16945 Rteel 29 F
## 10 16111 Abdalrhman 28 M
## 11 16224 Mhdi 9 M
## 12 16980 Tala 9 F
## 13 16779 Remas 30 F
## 14 16000 Nadiah 29 F
## 15 16111 Abdalrhman 28 M
## 16 16224 Mhdi 6 M
## 17 16400 Lila 33 F
## 18 16327 Fatima 100 F
1)Use box plot
box_plot <-boxplot(data$Age)$out
mtext(paste("Outliers: ", paste(box_plot, collapse = ", ")))
#Identify rows containing outliers
out_ind <- which(data$Age %in% c(box_plot))
out_ind
## [1] 18
data[out_ind, ]
## ID Name Age Sex
## 18 16327 Fatima 100 F
#Remove outliers
outliers <- boxplot(data$Age, plot=FALSE)$out
outliers
## [1] 100
data[-which(data$Age %in% outliers),]
## ID Name Age Sex
## 1 16762 Ahmed 10 M
## 2 16439 Osama 2 M
## 3 16211 Ibraheem 2 M
## 4 16790 Fahd 27 M
## 5 16443 Majeda 9 F
## 6 16998 Hdeel 32 F
## 7 16543 Mohammed 9 M
## 8 16779 Remas 10 F
## 9 16945 Rteel 29 F
## 10 16111 Abdalrhman 28 M
## 11 16224 Mhdi 9 M
## 12 16980 Tala 9 F
## 13 16779 Remas 30 F
## 14 16000 Nadiah 29 F
## 15 16111 Abdalrhman 28 M
## 16 16224 Mhdi 6 M
## 17 16400 Lila 33 F
Data
ID<-c(16762,16439,16211,16790,16443,16998,
16543,16779,16945,16111,16224,16980,
16779,16000,16111,16224,16400,16327)
Name<-c("Ahmed","Osama","Ibraheem","Fahd",
"Majeda","Hdeel","Mohammed","Remas",
"Rteel","Abdalrhman","Mhdi","Tala",
"Remas","Nadiah","Abdalrhman","Mhdi",
"Lila","Fatima")
Age<-c(10,2,2,27,9,32,9,10,29,28,9,
9,30,29,28,6,33 ,100)
Sex<-c("M","M","M","M","F","F","M","F",
"F","M","M","F","F","F","M","M",
"F","F")
data<-data.frame(ID,Name,Age,Sex)
data
## ID Name Age Sex
## 1 16762 Ahmed 10 M
## 2 16439 Osama 2 M
## 3 16211 Ibraheem 2 M
## 4 16790 Fahd 27 M
## 5 16443 Majeda 9 F
## 6 16998 Hdeel 32 F
## 7 16543 Mohammed 9 M
## 8 16779 Remas 10 F
## 9 16945 Rteel 29 F
## 10 16111 Abdalrhman 28 M
## 11 16224 Mhdi 9 M
## 12 16980 Tala 9 F
## 13 16779 Remas 30 F
## 14 16000 Nadiah 29 F
## 15 16111 Abdalrhman 28 M
## 16 16224 Mhdi 6 M
## 17 16400 Lila 33 F
## 18 16327 Fatima 100 F
2)Use inter quartile range
Upper Range = Q3+1.5IQR
Lower Range = Q1-1.5
Outliers = Observations > Q3 + 1.5IQR or < Q1 – 1.5*IQR
Q1<-quantile(data$Age,.25);Q1
## 25%
## 9
Q3<-quantile(data$Age,.75);Q3
## 75%
## 29
IQR<-IQR(data$Age);IQR
## [1] 20
#Upper Range Q3+1.5*IQR
Up<-29+(1.5*IQR)
#Lower Range Q1-1.5*IQR
Low<-9-(1.5*IQR)
#Remove outliers
subset(data,data$Age>(9-(1.5*IQR))&data$Age<(29+(1.5*IQR)))
## ID Name Age Sex
## 1 16762 Ahmed 10 M
## 2 16439 Osama 2 M
## 3 16211 Ibraheem 2 M
## 4 16790 Fahd 27 M
## 5 16443 Majeda 9 F
## 6 16998 Hdeel 32 F
## 7 16543 Mohammed 9 M
## 8 16779 Remas 10 F
## 9 16945 Rteel 29 F
## 10 16111 Abdalrhman 28 M
## 11 16224 Mhdi 9 M
## 12 16980 Tala 9 F
## 13 16779 Remas 30 F
## 14 16000 Nadiah 29 F
## 15 16111 Abdalrhman 28 M
## 16 16224 Mhdi 6 M
## 17 16400 Lila 33 F