Functions covered:

References

Row and column functions

Before explaining the apply family functions, here are four related functions that will save you lots of time and will help you understand the apply family functions. They are rowSums, colSums, rowMeans, and colMeans.

dat <- matrix(rnorm(n = 15, mean = 1000, sd = 300), nrow = 3, ncol = 5) #create a 3 by 5 matrix 
dat
##           [,1]      [,2]      [,3]      [,4]     [,5]
## [1,] 1063.3860  828.3269 1389.4636  890.3890 1155.828
## [2,]  855.7374  956.7276  871.5523  350.2771 1042.304
## [3,]  896.1662 1280.5432  830.1753 1172.5095 1400.434

Sum of each row:

rowSums(dat) #3 rows
## [1] 5327.393 4076.598 5579.828

Sum of each column:

colSums(dat) #5 columns
## [1] 2815.290 3065.598 3091.191 2413.176 3598.566

Mean of each row:

rowMeans(dat) #3 rows
## [1] 1065.4786  815.3196 1115.9657

Mean of each column:

colMeans(dat) #5 columns
## [1]  938.4299 1021.8659 1030.3971  804.3919 1199.5219

apply

dat <- matrix(rnorm(n = 15, mean = 100, sd = 30), nrow = 3, ncol = 5) #create a 2 3 by 5 dataframe
dat
##          [,1]     [,2]      [,3]      [,4]      [,5]
## [1,] 164.1750  86.9395 106.95664 107.49572 117.00086
## [2,] 106.6604 100.4944  39.89724  86.59982  20.47244
## [3,] 124.9766 137.9387 120.85706 141.07982  88.77707

Applying function to each row (row = margin 1):

apply(dat, 1, mean) #find mean of each row in dat
## [1] 116.51353  70.82487 122.72586
rowMeans(dat) #same output as above
## [1] 116.51353  70.82487 122.72586

Applying function to each column (column = margin 2):

apply(dat, 2, mean) #find mean of each column in dat
## [1] 131.93735 108.45753  89.23698 111.72512  75.41679
colMeans(dat) #same output as above
## [1] 131.93735 108.45753  89.23698 111.72512  75.41679

lapply

Using lapply on a vector

#create character vector with city names
cities <- c("New York", "London", "Cape Town")
#for each element in X, apply function nchar() to count number of characters
lapply(X = cities, FUN = nchar)
## [[1]]
## [1] 8
## 
## [[2]]
## [1] 6
## 
## [[3]]
## [1] 9

To return values as vector, use unlist:

unlist(lapply(X = cities, FUN = nchar))
## [1] 8 6 9

lapply is similar to the verbose for loop:

numChars <- c()
for (i in 1:length(cities)) {
    numChars[i] <- nchar(cities[i])
}
numChars
## [1] 8 6 9

Using lapply on a list

num <- list(a = c(1:3), b = c(2:4), c = c(3:5))
num
## $a
## [1] 1 2 3
## 
## $b
## [1] 2 3 4
## 
## $c
## [1] 3 4 5
#for each of the three elements in the list, calculate the sum
lapply(num, sum)
## $a
## [1] 6
## 
## $b
## [1] 9
## 
## $c
## [1] 12
#for each element in the list, select the third element
lapply(num, function(z) {z[3]})
## $a
## [1] 3
## 
## $b
## [1] 4
## 
## $c
## [1] 5
#unlist output
unlist(lapply(num, function(z) {z[3]}))
## a b c 
## 3 4 5

sapply

#a list of temperature measurements for 5 days
temp <- list( 
    c(3, 7, 9, 6, -1), #temperatures for day 1
    c(6, 9, 12, 13, 5), #temperatures for day 2
    c(4, 8, 3, -1, -3), #temperatures for day 3
    c(1, 4, 7, 2, -2), #temperatures for day 4
    c(5, 7, 9, 4, 2) #temperatures for day 5
)
sapply(temp, min) #find min for each day (returns a vector)
## [1] -1  5 -3 -2  2

Compare the output of sapply with lapply:

lapply(temp, min) #find min for each day (returns a list)
## [[1]]
## [1] -1
## 
## [[2]]
## [1] 5
## 
## [[3]]
## [1] -3
## 
## [[4]]
## [1] -2
## 
## [[5]]
## [1] 2

To get a vector when we use lapply, we need to use unlist:

unlist(lapply(temp, min)) #sapplly returns exactly the same output!
## [1] -1  5 -3 -2  2

What if the function supplied to sapply returns more than one value?

minAndMax <- function(x) { #create a  function that returns the min and max 
    return(c(min = min(x), max = max(x)))
}
sapply(temp, minAndMax) #see how sapply returns a nicely formatted matrix
##     [,1] [,2] [,3] [,4] [,5]
## min   -1    5   -3   -2    2
## max    9   13    8    7    9

Compare sapply with lapply:

lapply(temp, minAndMax)
## [[1]]
## min max 
##  -1   9 
## 
## [[2]]
## min max 
##   5  13 
## 
## [[3]]
## min max 
##  -3   8 
## 
## [[4]]
## min max 
##  -2   7 
## 
## [[5]]
## min max 
##   2   9

Even if you use unlist on the output from lapply, it won’t be nice:

unlist(lapply(temp, minAndMax))
## min max min max min max min max min max 
##  -1   9   5  13  -3   8  -2   7   2   9

What happens when the function supplied to sapply returns output lengths?

belowZero <- function(x) { #create a function that return values below 0
    return(x[x < 0])
}

sapply returns a list:

sapply(temp, belowZero)
## [[1]]
## [1] -1
## 
## [[2]]
## numeric(0)
## 
## [[3]]
## [1] -1 -3
## 
## [[4]]
## [1] -2
## 
## [[5]]
## numeric(0)

which is the same as the output from lapply

identical(sapply(temp, belowZero), lapply(temp, belowZero))
## [1] TRUE
lapply(temp, belowZero) #exactly the same as sapply
## [[1]]
## [1] -1
## 
## [[2]]
## numeric(0)
## 
## [[3]]
## [1] -1 -3
## 
## [[4]]
## [1] -2
## 
## [[5]]
## numeric(0)

vapply

cities <- c("New York", "London", "Cape Town")
vapply(cities, nchar, numeric(1)) #tells vapply we're expecting 1 number for each element
##  New York    London Cape Town 
##         8         6         9

If you specify numeric(2), R will return an error message.

dat <- list(a = 1:10, b = 11:20)
vapply(dat, function(x) {return(c(min = min(x), max = max(x)))}, c(min = 0, max = 0)) #returns minimum and maximum
##      a  b
## min  1 11
## max 10 20

mapply

mapply(sum, 1:5, 1:5, 1:5)
## [1]  3  6  9 12 15

What mapply(sum, 1:5, 1:5, 1:5) does is apply a function to the first elements of each, second elements of each, third elements of each… So what it does in mapply(sum, 1:5, 1:5, 1:5) is:

Another example:

mapply(rep, 1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2
## 
## [[3]]
## [1] 3 3
## 
## [[4]]
## [1] 4

Repeat 1 four times, 2 three times, 3 two times, and 4 one time. mapply(rep, 1:4, 4:1) is equivalent to:

list(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1))
## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2
## 
## [[3]]
## [1] 3 3
## 
## [[4]]
## [1] 4

rapply

dat <- list(a = list("A", "B", "C"), b = c(1, 100), c = list("Hey"))
dat
## $a
## $a[[1]]
## [1] "A"
## 
## $a[[2]]
## [1] "B"
## 
## $a[[3]]
## [1] "C"
## 
## 
## $b
## [1]   1 100
## 
## $c
## $c[[1]]
## [1] "Hey"
#create custom function to supply to rapply
myFunction <- function(x) {
    if (is.character(x)) { #if element within the list is a character, add !
        return(paste0(x, "!"))
    }
    else {
        return(x + 1000) #if element isn't a character, add 1000 to it
    }
}
rapply(dat, myFunction)
##     a1     a2     a3     b1     b2      c 
##   "A!"   "B!"   "C!" "1001" "1100" "Hey!"

tapply

x <- c(1, 2, 3, 10, 20, 30, 100, 200, 300) #create vector
groups <- c("a", "a", "a", "b", "b", "b", "c", "c", "c") #create grouping variable (3 groups)
tapply(x, groups, mean)
##   a   b   c 
##   2  20 200

In essence, tapply has done this:

c(a = mean(c(1, 2, 3)), #mean of group a
  b = mean(c(10, 20, 30)), #mean of group b
  c = mean(c(100, 200, 300)) #mean of group c
  )
##   a   b   c 
##   2  20 200

Another example with the ChickWeight dataset included in R:

head(ChickWeight)
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1
table(ChickWeight$Diet) #four types of diets (1, 2, 3, 4)
## 
##   1   2   3   4 
## 220 120 120 118

Use tapply to find mean weight for each type of diet:

tapply(ChickWeight$weight, ChickWeight$Diet, mean)
##        1        2        3        4 
## 102.6455 122.6167 142.9500 135.2627

by

aggregate

replicate