Data
ID<-c(16762,16439,16211,16790,16443,16998,
16543,16779,16945,16111,16224,16980,
16779,16000,16111,16224,16400,16327)
Name<-c("Ahmed","Osama","Ibraheem","Fahd",
"Majeda","Hdeel","Mohammed","Remas",
"Rteel","Abdalrhman","Mhdi","Tala",
"Remas","Nadiah","Abdalrhman","Mhdi",
"Lila","Fatima")
Age<-c(30,32,29,7,27,9,32,9,10,29,28,9,
9,30,29,28,42,33 )
Sex<-c("M","M","M","M","F","F","M","F",
"F","M","M","F","F","F","M","M",
"F","F")
data<-data.frame(ID,Name,Age,Sex)
data
## ID Name Age Sex
## 1 16762 Ahmed 30 M
## 2 16439 Osama 32 M
## 3 16211 Ibraheem 29 M
## 4 16790 Fahd 7 M
## 5 16443 Majeda 27 F
## 6 16998 Hdeel 9 F
## 7 16543 Mohammed 32 M
## 8 16779 Remas 9 F
## 9 16945 Rteel 10 F
## 10 16111 Abdalrhman 29 M
## 11 16224 Mhdi 28 M
## 12 16980 Tala 9 F
## 13 16779 Remas 9 F
## 14 16000 Nadiah 30 F
## 15 16111 Abdalrhman 29 M
## 16 16224 Mhdi 28 M
## 17 16400 Lila 42 F
## 18 16327 Fatima 33 F
1) Using distinct() function
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Remove duplicates from data
distinct(data)
## ID Name Age Sex
## 1 16762 Ahmed 30 M
## 2 16439 Osama 32 M
## 3 16211 Ibraheem 29 M
## 4 16790 Fahd 7 M
## 5 16443 Majeda 27 F
## 6 16998 Hdeel 9 F
## 7 16543 Mohammed 32 M
## 8 16779 Remas 9 F
## 9 16945 Rteel 10 F
## 10 16111 Abdalrhman 29 M
## 11 16224 Mhdi 28 M
## 12 16980 Tala 9 F
## 13 16000 Nadiah 30 F
## 14 16400 Lila 42 F
## 15 16327 Fatima 33 F
#Remove Duplicate Rows based on a variable
distinct(data,Sex,.keep_all= TRUE)
## ID Name Age Sex
## 1 16762 Ahmed 30 M
## 2 16443 Majeda 27 F
#Remove Duplicate Rows based on multiple variables
distinct(data,Sex,Age,.keep_all= TRUE)
## ID Name Age Sex
## 1 16762 Ahmed 30 M
## 2 16439 Osama 32 M
## 3 16211 Ibraheem 29 M
## 4 16790 Fahd 7 M
## 5 16443 Majeda 27 F
## 6 16998 Hdeel 9 F
## 7 16945 Rteel 10 F
## 8 16224 Mhdi 28 M
## 9 16000 Nadiah 30 F
## 10 16400 Lila 42 F
## 11 16327 Fatima 33 F
2) Using duplicated() function
which(duplicated(data))
## [1] 13 15 16
#Remove duplicates from data
data[!duplicated(data), ]
## ID Name Age Sex
## 1 16762 Ahmed 30 M
## 2 16439 Osama 32 M
## 3 16211 Ibraheem 29 M
## 4 16790 Fahd 7 M
## 5 16443 Majeda 27 F
## 6 16998 Hdeel 9 F
## 7 16543 Mohammed 32 M
## 8 16779 Remas 9 F
## 9 16945 Rteel 10 F
## 10 16111 Abdalrhman 29 M
## 11 16224 Mhdi 28 M
## 12 16980 Tala 9 F
## 14 16000 Nadiah 30 F
## 17 16400 Lila 42 F
## 18 16327 Fatima 33 F
#Remove duplicates from value of the column
data[!duplicated(data$Age), ]
## ID Name Age Sex
## 1 16762 Ahmed 30 M
## 2 16439 Osama 32 M
## 3 16211 Ibraheem 29 M
## 4 16790 Fahd 7 M
## 5 16443 Majeda 27 F
## 6 16998 Hdeel 9 F
## 9 16945 Rteel 10 F
## 11 16224 Mhdi 28 M
## 17 16400 Lila 42 F
## 18 16327 Fatima 33 F
3) Using unique() function
#Remove duplicates from data
unique(data)
## ID Name Age Sex
## 1 16762 Ahmed 30 M
## 2 16439 Osama 32 M
## 3 16211 Ibraheem 29 M
## 4 16790 Fahd 7 M
## 5 16443 Majeda 27 F
## 6 16998 Hdeel 9 F
## 7 16543 Mohammed 32 M
## 8 16779 Remas 9 F
## 9 16945 Rteel 10 F
## 10 16111 Abdalrhman 29 M
## 11 16224 Mhdi 28 M
## 12 16980 Tala 9 F
## 14 16000 Nadiah 30 F
## 17 16400 Lila 42 F
## 18 16327 Fatima 33 F
#unique value of the column
unique(data$Age)
## [1] 30 32 29 7 27 9 10 28 42 33