# read the data from "MASS" package
library(MASS)
library(knitr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data(Animals, package="MASS")
knitr::kable(head(Animals))
body brain
Mountain beaver 1.35 8.1
Cow 465.00 423.0
Grey wolf 36.33 119.5
Goat 27.66 115.0
Guinea pig 1.04 5.5
Dipliodocus 11700.00 50.0
data("mammals", package="MASS")
knitr::kable(head(mammals))
body brain
Arctic fox 3.385 44.5
Owl monkey 0.480 15.5
Mountain beaver 1.350 8.1
Cow 465.000 423.0
Grey wolf 36.330 119.5
Goat 27.660 115.0
# Since the two datasets share the same columns, I combine the data by the rows
dta <- rbind(Animals,mammals)
str(dta)
## 'data.frame':    90 obs. of  2 variables:
##  $ body : num  1.35 465 36.33 27.66 1.04 ...
##  $ brain: num  8.1 423 119.5 115 5.5 ...
# Locate the duplicated data
duplicated(dta)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
## [49]  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE
## [61]  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE
## [73]  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE
# There are 25 duplicated observations.
dta1 <- dta[duplicated(dta), ] %>% str(dta)
## 'data.frame':    25 obs. of  2 variables:
##  $ body : num  1.35 465 36.33 27.66 1.04 ...
##  $ brain: num  8.1 423 119.5 115 5.5 ...
# By removing the duplicated observations, we have 65 observations of 2 variables.
dta2 <- dta[!duplicated(dta), ] %>% str(dta)
## 'data.frame':    65 obs. of  2 variables:
##  $ body : num  1.35 465 36.33 27.66 1.04 ...
##  $ brain: num  8.1 423 119.5 115 5.5 ...
# The result is the same as the funtion unique().
str(unique(dta))
## 'data.frame':    65 obs. of  2 variables:
##  $ body : num  1.35 465 36.33 27.66 1.04 ...
##  $ brain: num  8.1 423 119.5 115 5.5 ...
# We can also do it by the function distinct() from [dplyr package]. It can be used to keep only unique/distinct rows from a data frame. If there are duplicate rows, only the first row is preserved. 
dta %>% dplyr::distinct(body,brain, .keep_all = TRUE)%>% str(dta)
## 'data.frame':    65 obs. of  2 variables:
##  $ body : num  1.35 465 36.33 27.66 1.04 ...
##  $ brain: num  8.1 423 119.5 115 5.5 ...