Sean Wen
15th January 2019
head(mtcars)## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Modify the dataset slightly for our session
df <- data.frame("Model"=row.names(mtcars), mtcars, stringsAsFactors=FALSE)
row.names(df) <- NULL
head(df)## Model mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Select one rows (and all columns)
df.small <- df[1, ]
print(df.small)## Model mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4
# Select several contiguous rows (and all columns)
df.small <- df[1:2, ]
print(df.small)## Model mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4
# Select several non-contiguous rows (and all columns)
df.small <- df[c(1,3, 6), ]
print(df.small)## Model mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160 110 3.90 2.62 16.46 0 1 4 4
## 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
## 6 Valiant 18.1 6 225 105 2.76 3.46 20.22 1 0 3 1
# Select one column (and all rows)
df.small <- df[, 1, drop=FALSE]
head(df.small)## Model
## 1 Mazda RX4
## 2 Mazda RX4 Wag
## 3 Datsun 710
## 4 Hornet 4 Drive
## 5 Hornet Sportabout
## 6 Valiant
# Select several contiguous columns (and all rows)
df.small <- df[, 1:2]
head(df.small)## Model mpg
## 1 Mazda RX4 21.0
## 2 Mazda RX4 Wag 21.0
## 3 Datsun 710 22.8
## 4 Hornet 4 Drive 21.4
## 5 Hornet Sportabout 18.7
## 6 Valiant 18.1
# Select several non-contiguous columns (and all rows)
df.small <- df[, c(1,3, 6)]
head(df.small)## Model cyl drat
## 1 Mazda RX4 6 3.90
## 2 Mazda RX4 Wag 6 3.90
## 3 Datsun 710 4 3.85
## 4 Hornet 4 Drive 6 3.08
## 5 Hornet Sportabout 8 3.15
## 6 Valiant 6 2.76
# Select one rows and column
df.small <- df[1, 1]
df.small## [1] "Mazda RX4"
# Select several contiguous rows and columns
df.small <- df[1:2, 1:2]
print(df.small)## Model mpg
## 1 Mazda RX4 21
## 2 Mazda RX4 Wag 21
# Select several non-contiguous rows and columns
df.small <- df[c(1,3, 6), c(1,3, 6)]
print(df.small)## Model cyl drat
## 1 Mazda RX4 6 3.90
## 3 Datsun 710 4 3.85
## 6 Valiant 6 2.76
# Create vector of characters as an example
random <- sample(c("A", "B", "C"), size=10, replace=TRUE)
random## [1] "A" "B" "B" "B" "B" "C" "B" "C" "A" "C"
# Perform logical test. TRUE indicates the specified condition is met
random=="A"## [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
# which() indicates the order number which corresponds to TRUE
which(random=="A")## [1] 1 9
# Using the square bracket notation, we subset our desired character
random[which(random=="A")]## [1] "A" "A"
df.small <- df[which(df$Model=="Datsun 710"), ]# Using AND operator
df.small <- df[which(df$cyl==6 & df$disp <= 160), ]
dim(df.small)## [1] 3 12
# Using OR operator
df.small <- df[which(df$cyl==6 | df$disp <= 160), ]
dim(df.small)## [1] 18 12
# First specific your conditions
conditions <- c("Hornet 4 Drive", "Valiant", "Merc 280", "onda Civic", "Lotus Europa")
# Subset base on your specified conditions
df.small <- df[which(df$Model %in% conditions), ]
print(df.small)## Model mpg cyl disp hp drat wt qsec vs am gear carb
## 4 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 6 Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 10 Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## 28 Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
df.small <- subset(df, subset=(Model=="Datsun 710"), )
print(df.small)## Model mpg cyl disp hp drat wt qsec vs am gear carb
## 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
# Using AND operator
df.small <- subset(df, subset=(cyl==6 & disp <= 160), )
dim(df.small)## [1] 3 12
# Using OR operator
df.small <- subset(df, subset=(cyl==6 | disp <= 160), )
dim(df.small)## [1] 18 12
# Subset every Madza model
df.small <- df[grep("Mazda", df$Model), ]
print(df.small)## Model mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4
# Subset every Merc model
df.small <- df[grep("Merc", df$Model), ]
print(df.small)## Model mpg cyl disp hp drat wt qsec vs am gear carb
## 8 Merc 240D 24.4 4 146.7 62 3.69 3.19 20.0 1 0 4 2
## 9 Merc 230 22.8 4 140.8 95 3.92 3.15 22.9 1 0 4 2
## 10 Merc 280 19.2 6 167.6 123 3.92 3.44 18.3 1 0 4 4
## 11 Merc 280C 17.8 6 167.6 123 3.92 3.44 18.9 1 0 4 4
## 12 Merc 450SE 16.4 8 275.8 180 3.07 4.07 17.4 0 0 3 3
## 13 Merc 450SL 17.3 8 275.8 180 3.07 3.73 17.6 0 0 3 3
## 14 Merc 450SLC 15.2 8 275.8 180 3.07 3.78 18.0 0 0 3 3
# Create vector of characters as an example
random <- sample(c("Alice", "Allen", "Adam"), size=10, replace=TRUE)
random## [1] "Adam" "Allen" "Alice" "Allen" "Alice" "Adam" "Adam" "Allen"
## [9] "Adam" "Alice"
# grep() indicates the order number which corresponds to TRUE
grep("Ad", random)## [1] 1 6 7 9
# Using the square bracket notation, we subset our desired character
random[grep("Ad", random)]## [1] "Adam" "Adam" "Adam" "Adam"
# Subsetting one column
df.small <- df[, "Model", drop=FALSE]
head(df.small)## Model
## 1 Mazda RX4
## 2 Mazda RX4 Wag
## 3 Datsun 710
## 4 Hornet 4 Drive
## 5 Hornet Sportabout
## 6 Valiant
# Subsetting multiple columns
df.small <- df[, c("Model", "wt")]
head(df.small)## Model wt
## 1 Mazda RX4 2.620
## 2 Mazda RX4 Wag 2.875
## 3 Datsun 710 2.320
## 4 Hornet 4 Drive 3.215
## 5 Hornet Sportabout 3.440
## 6 Valiant 3.460
# First specific your conditions
conditions <- c("Model", "disp", "vs", "am", "gear")
# Subset base on your specified conditions
df.small <- df[, names(df.small) %in% conditions]
head(df.small)## Model cyl hp wt vs gear
## 1 Mazda RX4 6 110 2.620 0 4
## 2 Mazda RX4 Wag 6 110 2.875 0 4
## 3 Datsun 710 4 93 2.320 1 4
## 4 Hornet 4 Drive 6 110 3.215 1 3
## 5 Hornet Sportabout 8 175 3.440 0 3
## 6 Valiant 6 105 3.460 1 3
# Subset columns begining with c
df.small <- df[, grep("^c", names(df))]
head(df.small)## cyl carb
## 1 6 4
## 2 6 4
## 3 4 1
## 4 6 1
## 5 8 2
## 6 6 1
df.small <- df[grep("Merc", df$Model), c("Model", "wt")]
head(df.small)## Model wt
## 8 Merc 240D 3.19
## 9 Merc 230 3.15
## 10 Merc 280 3.44
## 11 Merc 280C 3.44
## 12 Merc 450SE 4.07
## 13 Merc 450SL 3.73