Ordering Data Frames

Sean Wen

31st January 2019

Prologue

  1. I would like to have a peek at the ‘top hits’ in my data frame. Normally, this would be when I am probing or exploring my dataset from different angles.
  2. I intend to write/export the data frame into a file (e.g. .txt/.csv) for a friend and ordering the data frame by my friend’s variable of interest.

Getting started

set.seed(123)

Name <- c("Cathy", "Vincent", "Nicholas", "Drake", "Karmen", "Robert", "Watson", "Ginger", "Matt", "Merton")
City <- sample(c("Aberdeen", "Southampton", "London"), size=10, replace=TRUE)
Cruisine <- sample(c("Arab", "Lebanese", "European"), size=10, replace=TRUE)
Age <- round(runif(n=10, min=20, max=60), digit=0)

df <- data.frame(Name, City, Cruisine, Age, stringsAsFactors=FALSE)
print(df)
##        Name        City Cruisine Age
## 1     Cathy    Aberdeen European  56
## 2   Vincent      London Lebanese  48
## 3  Nicholas Southampton European  46
## 4     Drake      London Lebanese  60
## 5    Karmen      London     Arab  46
## 6    Robert    Aberdeen European  48
## 7    Watson Southampton     Arab  42
## 8    Ginger      London     Arab  44
## 9      Matt Southampton     Arab  32
## 10   Merton Southampton European  26

Ordering based on 1 column

# Order rows by a categorical variable
df.ordered <- df[order(df$City), ]
print(df.ordered)
##        Name        City Cruisine Age
## 1     Cathy    Aberdeen European  56
## 6    Robert    Aberdeen European  48
## 2   Vincent      London Lebanese  48
## 4     Drake      London Lebanese  60
## 5    Karmen      London     Arab  46
## 8    Ginger      London     Arab  44
## 3  Nicholas Southampton European  46
## 7    Watson Southampton     Arab  42
## 9      Matt Southampton     Arab  32
## 10   Merton Southampton European  26
# Order rows by a continuous variable
df.ordered <- df[order(df$Age), ]
print(df.ordered)
##        Name        City Cruisine Age
## 10   Merton Southampton European  26
## 9      Matt Southampton     Arab  32
## 7    Watson Southampton     Arab  42
## 8    Ginger      London     Arab  44
## 3  Nicholas Southampton European  46
## 5    Karmen      London     Arab  46
## 2   Vincent      London Lebanese  48
## 6    Robert    Aberdeen European  48
## 1     Cathy    Aberdeen European  56
## 4     Drake      London Lebanese  60

Ordering based on >1 column

df.ordered <- df[order(df$City, df$Age), ]
print(df.ordered)
##        Name        City Cruisine Age
## 6    Robert    Aberdeen European  48
## 1     Cathy    Aberdeen European  56
## 8    Ginger      London     Arab  44
## 5    Karmen      London     Arab  46
## 2   Vincent      London Lebanese  48
## 4     Drake      London Lebanese  60
## 10   Merton Southampton European  26
## 9      Matt Southampton     Arab  32
## 7    Watson Southampton     Arab  42
## 3  Nicholas Southampton European  46
df.ordered <- df[order(df$Cruisine, df$Age), ]
print(df.ordered)
##        Name        City Cruisine Age
## 9      Matt Southampton     Arab  32
## 7    Watson Southampton     Arab  42
## 8    Ginger      London     Arab  44
## 5    Karmen      London     Arab  46
## 10   Merton Southampton European  26
## 3  Nicholas Southampton European  46
## 6    Robert    Aberdeen European  48
## 1     Cathy    Aberdeen European  56
## 2   Vincent      London Lebanese  48
## 4     Drake      London Lebanese  60
df.ordered <- df[order(df$City, df$Cruisine), ]
print(df.ordered)
##        Name        City Cruisine Age
## 1     Cathy    Aberdeen European  56
## 6    Robert    Aberdeen European  48
## 5    Karmen      London     Arab  46
## 8    Ginger      London     Arab  44
## 2   Vincent      London Lebanese  48
## 4     Drake      London Lebanese  60
## 7    Watson Southampton     Arab  42
## 9      Matt Southampton     Arab  32
## 3  Nicholas Southampton European  46
## 10   Merton Southampton European  26

Ordering in reverse order

# Order rows by a categorical variable
df.ordered <- df[order(df$City, decreasing=TRUE), ]
print(df.ordered)
##        Name        City Cruisine Age
## 3  Nicholas Southampton European  46
## 7    Watson Southampton     Arab  42
## 9      Matt Southampton     Arab  32
## 10   Merton Southampton European  26
## 2   Vincent      London Lebanese  48
## 4     Drake      London Lebanese  60
## 5    Karmen      London     Arab  46
## 8    Ginger      London     Arab  44
## 1     Cathy    Aberdeen European  56
## 6    Robert    Aberdeen European  48
# Order rows by a continuous variable
df.ordered <- df[order(df$Age, decreasing=TRUE), ]
print(df.ordered)
##        Name        City Cruisine Age
## 4     Drake      London Lebanese  60
## 1     Cathy    Aberdeen European  56
## 2   Vincent      London Lebanese  48
## 6    Robert    Aberdeen European  48
## 3  Nicholas Southampton European  46
## 5    Karmen      London     Arab  46
## 8    Ginger      London     Arab  44
## 7    Watson Southampton     Arab  42
## 9      Matt Southampton     Arab  32
## 10   Merton Southampton European  26

Ordering in reverse order

# Order rows by a continuous variable
df.ordered <- df[order(-df$City), ]
## Error in -df$City: invalid argument to unary operator
print(df.ordered)
##        Name        City Cruisine Age
## 4     Drake      London Lebanese  60
## 1     Cathy    Aberdeen European  56
## 2   Vincent      London Lebanese  48
## 6    Robert    Aberdeen European  48
## 3  Nicholas Southampton European  46
## 5    Karmen      London     Arab  46
## 8    Ginger      London     Arab  44
## 7    Watson Southampton     Arab  42
## 9      Matt Southampton     Arab  32
## 10   Merton Southampton European  26
# Order rows by a continuous variable
df.ordered <- df[order(-df$Age), ]
print(df.ordered)
##        Name        City Cruisine Age
## 4     Drake      London Lebanese  60
## 1     Cathy    Aberdeen European  56
## 2   Vincent      London Lebanese  48
## 6    Robert    Aberdeen European  48
## 3  Nicholas Southampton European  46
## 5    Karmen      London     Arab  46
## 8    Ginger      London     Arab  44
## 7    Watson Southampton     Arab  42
## 9      Matt Southampton     Arab  32
## 10   Merton Southampton European  26

Customising order of elements

# Check if the variable is a factor class
class(df$City)
## [1] "character"
# Convert class of variable from character to factor
df$City <- as.factor(df$City)
class(df$City)
## [1] "factor"
# Specify new levels
df$City <- factor(df$City, levels=c("London", "Southampton", "Aberdeen"))
df.ordered <- df[order(df$City), ]
print(df.ordered)
##        Name        City Cruisine Age
## 2   Vincent      London Lebanese  48
## 4     Drake      London Lebanese  60
## 5    Karmen      London     Arab  46
## 8    Ginger      London     Arab  44
## 3  Nicholas Southampton European  46
## 7    Watson Southampton     Arab  42
## 9      Matt Southampton     Arab  32
## 10   Merton Southampton European  26
## 1     Cathy    Aberdeen European  56
## 6    Robert    Aberdeen European  48

Ordering vs. sorting

# order() returns indices
order(df$City)
##  [1]  2  4  5  8  3  7  9 10  1  6
# sort() returns the elements themselves
sort(df$City)
##  [1] London      London      London      London      Southampton
##  [6] Southampton Southampton Southampton Aberdeen    Aberdeen   
## Levels: London Southampton Aberdeen

Summary