#identify and remove duplicated rows irrespective of column order
# Create an example data frame
df <- data.frame(
A = c(1, 2, 3, 4, 5),
B = c("a", "b", "c", "d", "e"),
C = c(10, 20, 30, 40, 50)
)
# Add a row with the same data (changing the order of columns)
df <- rbind(df, c("b", 2, 20))
# Display the original data frame
print("Original Data Frame:")
## [1] "Original Data Frame:"
print(df)
## A B C
## 1 1 a 10
## 2 2 b 20
## 3 3 c 30
## 4 4 d 40
## 5 5 e 50
## 6 b 2 20
# Sort the values in each row
df_sorted <- t(apply(df, 1, function(x) sort(x)))
df_sorted
## [,1] [,2] [,3]
## [1,] "1" "10" "a"
## [2,] "2" "20" "b"
## [3,] "3" "30" "c"
## [4,] "4" "40" "d"
## [5,] "5" "50" "e"
## [6,] "2" "20" "b"
# Find duplicated rows
duplicate_rows <- duplicated(df_sorted)
duplicate_rows
## [1] FALSE FALSE FALSE FALSE FALSE TRUE
duplicate_rows_df <- df[!duplicate_rows, ]
duplicate_rows_df
## A B C
## 1 1 a 10
## 2 2 b 20
## 3 3 c 30
## 4 4 d 40
## 5 5 e 50
df
## A B C
## 1 1 a 10
## 2 2 b 20
## 3 3 c 30
## 4 4 d 40
## 5 5 e 50
## 6 b 2 20