Sean Wen
31st January 2019
set.seed(123)
Name <- c("Cathy", "Vincent", "Nicholas", "Drake", "Karmen", "Robert", "Watson", "Ginger", "Matt", "Merton")
City <- sample(c("Aberdeen", "Southampton", "London"), size=10, replace=TRUE)
Cruisine <- sample(c("Arab", "Lebanese", "European"), size=10, replace=TRUE)
Age <- round(runif(n=10, min=20, max=60), digit=0)
df <- data.frame(Name, City, Cruisine, Age, stringsAsFactors=FALSE)
print(df)## Name City Cruisine Age
## 1 Cathy Aberdeen European 56
## 2 Vincent London Lebanese 48
## 3 Nicholas Southampton European 46
## 4 Drake London Lebanese 60
## 5 Karmen London Arab 46
## 6 Robert Aberdeen European 48
## 7 Watson Southampton Arab 42
## 8 Ginger London Arab 44
## 9 Matt Southampton Arab 32
## 10 Merton Southampton European 26
# Order rows by a categorical variable
df.ordered <- df[order(df$City), ]
print(df.ordered)## Name City Cruisine Age
## 1 Cathy Aberdeen European 56
## 6 Robert Aberdeen European 48
## 2 Vincent London Lebanese 48
## 4 Drake London Lebanese 60
## 5 Karmen London Arab 46
## 8 Ginger London Arab 44
## 3 Nicholas Southampton European 46
## 7 Watson Southampton Arab 42
## 9 Matt Southampton Arab 32
## 10 Merton Southampton European 26
# Order rows by a continuous variable
df.ordered <- df[order(df$Age), ]
print(df.ordered)## Name City Cruisine Age
## 10 Merton Southampton European 26
## 9 Matt Southampton Arab 32
## 7 Watson Southampton Arab 42
## 8 Ginger London Arab 44
## 3 Nicholas Southampton European 46
## 5 Karmen London Arab 46
## 2 Vincent London Lebanese 48
## 6 Robert Aberdeen European 48
## 1 Cathy Aberdeen European 56
## 4 Drake London Lebanese 60
df.ordered <- df[order(df$City, df$Age), ]
print(df.ordered)## Name City Cruisine Age
## 6 Robert Aberdeen European 48
## 1 Cathy Aberdeen European 56
## 8 Ginger London Arab 44
## 5 Karmen London Arab 46
## 2 Vincent London Lebanese 48
## 4 Drake London Lebanese 60
## 10 Merton Southampton European 26
## 9 Matt Southampton Arab 32
## 7 Watson Southampton Arab 42
## 3 Nicholas Southampton European 46
df.ordered <- df[order(df$Cruisine, df$Age), ]
print(df.ordered)## Name City Cruisine Age
## 9 Matt Southampton Arab 32
## 7 Watson Southampton Arab 42
## 8 Ginger London Arab 44
## 5 Karmen London Arab 46
## 10 Merton Southampton European 26
## 3 Nicholas Southampton European 46
## 6 Robert Aberdeen European 48
## 1 Cathy Aberdeen European 56
## 2 Vincent London Lebanese 48
## 4 Drake London Lebanese 60
df.ordered <- df[order(df$City, df$Cruisine), ]
print(df.ordered)## Name City Cruisine Age
## 1 Cathy Aberdeen European 56
## 6 Robert Aberdeen European 48
## 5 Karmen London Arab 46
## 8 Ginger London Arab 44
## 2 Vincent London Lebanese 48
## 4 Drake London Lebanese 60
## 7 Watson Southampton Arab 42
## 9 Matt Southampton Arab 32
## 3 Nicholas Southampton European 46
## 10 Merton Southampton European 26
# Order rows by a categorical variable
df.ordered <- df[order(df$City, decreasing=TRUE), ]
print(df.ordered)## Name City Cruisine Age
## 3 Nicholas Southampton European 46
## 7 Watson Southampton Arab 42
## 9 Matt Southampton Arab 32
## 10 Merton Southampton European 26
## 2 Vincent London Lebanese 48
## 4 Drake London Lebanese 60
## 5 Karmen London Arab 46
## 8 Ginger London Arab 44
## 1 Cathy Aberdeen European 56
## 6 Robert Aberdeen European 48
# Order rows by a continuous variable
df.ordered <- df[order(df$Age, decreasing=TRUE), ]
print(df.ordered)## Name City Cruisine Age
## 4 Drake London Lebanese 60
## 1 Cathy Aberdeen European 56
## 2 Vincent London Lebanese 48
## 6 Robert Aberdeen European 48
## 3 Nicholas Southampton European 46
## 5 Karmen London Arab 46
## 8 Ginger London Arab 44
## 7 Watson Southampton Arab 42
## 9 Matt Southampton Arab 32
## 10 Merton Southampton European 26
# Order rows by a continuous variable
df.ordered <- df[order(-df$City), ]## Error in -df$City: invalid argument to unary operator
print(df.ordered)## Name City Cruisine Age
## 4 Drake London Lebanese 60
## 1 Cathy Aberdeen European 56
## 2 Vincent London Lebanese 48
## 6 Robert Aberdeen European 48
## 3 Nicholas Southampton European 46
## 5 Karmen London Arab 46
## 8 Ginger London Arab 44
## 7 Watson Southampton Arab 42
## 9 Matt Southampton Arab 32
## 10 Merton Southampton European 26
# Order rows by a continuous variable
df.ordered <- df[order(-df$Age), ]
print(df.ordered)## Name City Cruisine Age
## 4 Drake London Lebanese 60
## 1 Cathy Aberdeen European 56
## 2 Vincent London Lebanese 48
## 6 Robert Aberdeen European 48
## 3 Nicholas Southampton European 46
## 5 Karmen London Arab 46
## 8 Ginger London Arab 44
## 7 Watson Southampton Arab 42
## 9 Matt Southampton Arab 32
## 10 Merton Southampton European 26
# Check if the variable is a factor class
class(df$City)## [1] "character"
# Convert class of variable from character to factor
df$City <- as.factor(df$City)
class(df$City)## [1] "factor"
# Specify new levels
df$City <- factor(df$City, levels=c("London", "Southampton", "Aberdeen"))
df.ordered <- df[order(df$City), ]
print(df.ordered)## Name City Cruisine Age
## 2 Vincent London Lebanese 48
## 4 Drake London Lebanese 60
## 5 Karmen London Arab 46
## 8 Ginger London Arab 44
## 3 Nicholas Southampton European 46
## 7 Watson Southampton Arab 42
## 9 Matt Southampton Arab 32
## 10 Merton Southampton European 26
## 1 Cathy Aberdeen European 56
## 6 Robert Aberdeen European 48
# order() returns indices
order(df$City)## [1] 2 4 5 8 3 7 9 10 1 6
# sort() returns the elements themselves
sort(df$City)## [1] London London London London Southampton
## [6] Southampton Southampton Southampton Aberdeen Aberdeen
## Levels: London Southampton Aberdeen