Functions covered:
References
Before explaining the apply family functions, here are four related functions that will save you lots of time and will help you understand the apply family functions. They are rowSums, colSums, rowMeans, and colMeans.
dat <- matrix(rnorm(n = 15, mean = 1000, sd = 300), nrow = 3, ncol = 5) #create a 3 by 5 matrix
dat
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1063.3860 828.3269 1389.4636 890.3890 1155.828
## [2,] 855.7374 956.7276 871.5523 350.2771 1042.304
## [3,] 896.1662 1280.5432 830.1753 1172.5095 1400.434
Sum of each row:
rowSums(dat) #3 rows
## [1] 5327.393 4076.598 5579.828
Sum of each column:
colSums(dat) #5 columns
## [1] 2815.290 3065.598 3091.191 2413.176 3598.566
Mean of each row:
rowMeans(dat) #3 rows
## [1] 1065.4786 815.3196 1115.9657
Mean of each column:
colMeans(dat) #5 columns
## [1] 938.4299 1021.8659 1030.3971 804.3919 1199.5219
apply returns a vectordat <- matrix(rnorm(n = 15, mean = 100, sd = 30), nrow = 3, ncol = 5) #create a 2 3 by 5 dataframe
dat
## [,1] [,2] [,3] [,4] [,5]
## [1,] 164.1750 86.9395 106.95664 107.49572 117.00086
## [2,] 106.6604 100.4944 39.89724 86.59982 20.47244
## [3,] 124.9766 137.9387 120.85706 141.07982 88.77707
Applying function to each row (row = margin 1):
apply(dat, 1, mean) #find mean of each row in dat
## [1] 116.51353 70.82487 122.72586
rowMeans(dat) #same output as above
## [1] 116.51353 70.82487 122.72586
Applying function to each column (column = margin 2):
apply(dat, 2, mean) #find mean of each column in dat
## [1] 131.93735 108.45753 89.23698 111.72512 75.41679
colMeans(dat) #same output as above
## [1] 131.93735 108.45753 89.23698 111.72512 75.41679
#create character vector with city names
cities <- c("New York", "London", "Cape Town")
#for each element in X, apply function nchar() to count number of characters
lapply(X = cities, FUN = nchar)
## [[1]]
## [1] 8
##
## [[2]]
## [1] 6
##
## [[3]]
## [1] 9
To return values as vector, use unlist:
unlist(lapply(X = cities, FUN = nchar))
## [1] 8 6 9
lapply is similar to the verbose for loop:
numChars <- c()
for (i in 1:length(cities)) {
numChars[i] <- nchar(cities[i])
}
numChars
## [1] 8 6 9
num <- list(a = c(1:3), b = c(2:4), c = c(3:5))
num
## $a
## [1] 1 2 3
##
## $b
## [1] 2 3 4
##
## $c
## [1] 3 4 5
#for each of the three elements in the list, calculate the sum
lapply(num, sum)
## $a
## [1] 6
##
## $b
## [1] 9
##
## $c
## [1] 12
#for each element in the list, select the third element
lapply(num, function(z) {z[3]})
## $a
## [1] 3
##
## $b
## [1] 4
##
## $c
## [1] 5
#unlist output
unlist(lapply(num, function(z) {z[3]}))
## a b c
## 3 4 5
#a list of temperature measurements for 5 days
temp <- list(
c(3, 7, 9, 6, -1), #temperatures for day 1
c(6, 9, 12, 13, 5), #temperatures for day 2
c(4, 8, 3, -1, -3), #temperatures for day 3
c(1, 4, 7, 2, -2), #temperatures for day 4
c(5, 7, 9, 4, 2) #temperatures for day 5
)
sapply(temp, min) #find min for each day (returns a vector)
## [1] -1 5 -3 -2 2
Compare the output of sapply with lapply:
lapply(temp, min) #find min for each day (returns a list)
## [[1]]
## [1] -1
##
## [[2]]
## [1] 5
##
## [[3]]
## [1] -3
##
## [[4]]
## [1] -2
##
## [[5]]
## [1] 2
To get a vector when we use lapply, we need to use unlist:
unlist(lapply(temp, min)) #sapplly returns exactly the same output!
## [1] -1 5 -3 -2 2
What if the function supplied to sapply returns more than one value?
minAndMax <- function(x) { #create a function that returns the min and max
return(c(min = min(x), max = max(x)))
}
sapply(temp, minAndMax) #see how sapply returns a nicely formatted matrix
## [,1] [,2] [,3] [,4] [,5]
## min -1 5 -3 -2 2
## max 9 13 8 7 9
Compare sapply with lapply:
lapply(temp, minAndMax)
## [[1]]
## min max
## -1 9
##
## [[2]]
## min max
## 5 13
##
## [[3]]
## min max
## -3 8
##
## [[4]]
## min max
## -2 7
##
## [[5]]
## min max
## 2 9
Even if you use unlist on the output from lapply, it won’t be nice:
unlist(lapply(temp, minAndMax))
## min max min max min max min max min max
## -1 9 5 13 -3 8 -2 7 2 9
What happens when the function supplied to sapply returns output lengths?
belowZero <- function(x) { #create a function that return values below 0
return(x[x < 0])
}
sapply returns a list:
sapply(temp, belowZero)
## [[1]]
## [1] -1
##
## [[2]]
## numeric(0)
##
## [[3]]
## [1] -1 -3
##
## [[4]]
## [1] -2
##
## [[5]]
## numeric(0)
which is the same as the output from lapply
identical(sapply(temp, belowZero), lapply(temp, belowZero))
## [1] TRUE
lapply(temp, belowZero) #exactly the same as sapply
## [[1]]
## [1] -1
##
## [[2]]
## numeric(0)
##
## [[3]]
## [1] -1 -3
##
## [[4]]
## [1] -2
##
## [[5]]
## numeric(0)
sapply in terms of functionalitycities <- c("New York", "London", "Cape Town")
vapply(cities, nchar, numeric(1)) #tells vapply we're expecting 1 number for each element
## New York London Cape Town
## 8 6 9
If you specify numeric(2), R will return an error message.
dat <- list(a = 1:10, b = 11:20)
vapply(dat, function(x) {return(c(min = min(x), max = max(x)))}, c(min = 0, max = 0)) #returns minimum and maximum
## a b
## min 1 11
## max 10 20
sapplymapply) to each elementmapply(sum, 1:5, 1:5, 1:5)
## [1] 3 6 9 12 15
What mapply(sum, 1:5, 1:5, 1:5) does is apply a function to the first elements of each, second elements of each, third elements of each… So what it does in mapply(sum, 1:5, 1:5, 1:5) is:
Another example:
mapply(rep, 1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
##
## [[2]]
## [1] 2 2 2
##
## [[3]]
## [1] 3 3
##
## [[4]]
## [1] 4
Repeat 1 four times, 2 three times, 3 two times, and 4 one time. mapply(rep, 1:4, 4:1) is equivalent to:
list(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1))
## [[1]]
## [1] 1 1 1 1
##
## [[2]]
## [1] 2 2 2
##
## [[3]]
## [1] 3 3
##
## [[4]]
## [1] 4
rapply because recursivedat <- list(a = list("A", "B", "C"), b = c(1, 100), c = list("Hey"))
dat
## $a
## $a[[1]]
## [1] "A"
##
## $a[[2]]
## [1] "B"
##
## $a[[3]]
## [1] "C"
##
##
## $b
## [1] 1 100
##
## $c
## $c[[1]]
## [1] "Hey"
#create custom function to supply to rapply
myFunction <- function(x) {
if (is.character(x)) { #if element within the list is a character, add !
return(paste0(x, "!"))
}
else {
return(x + 1000) #if element isn't a character, add 1000 to it
}
}
rapply(dat, myFunction)
## a1 a2 a3 b1 b2 c
## "A!" "B!" "C!" "1001" "1100" "Hey!"
x <- c(1, 2, 3, 10, 20, 30, 100, 200, 300) #create vector
groups <- c("a", "a", "a", "b", "b", "b", "c", "c", "c") #create grouping variable (3 groups)
tapply(x, groups, mean)
## a b c
## 2 20 200
In essence, tapply has done this:
c(a = mean(c(1, 2, 3)), #mean of group a
b = mean(c(10, 20, 30)), #mean of group b
c = mean(c(100, 200, 300)) #mean of group c
)
## a b c
## 2 20 200
Another example with the ChickWeight dataset included in R:
head(ChickWeight)
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
table(ChickWeight$Diet) #four types of diets (1, 2, 3, 4)
##
## 1 2 3 4
## 220 120 120 118
Use tapply to find mean weight for each type of diet:
tapply(ChickWeight$weight, ChickWeight$Diet, mean)
## 1 2 3 4
## 102.6455 122.6167 142.9500 135.2627