1. apply 对数组或者矩阵行或者列使用函数
apply(X, MARGIN, FUN, ...)
- X:矩阵或数组;
- MARGIN:1表示行,2表示列,也可以是c(1,2);
- FUN:应用的函数;
- ···:所应用的函数的参数;
x <- cbind(x1 = 3, x2 = c(4:1, 2:5))
dimnames(x)[[1]] <- letters[1:8]
x #x为矩阵
## x1 x2
## a 3 4
## b 3 3
## c 3 2
## d 3 1
## e 3 2
## f 3 3
## g 3 4
## h 3 5
# 计算矩阵x的每列均值:
col.sums <- apply(X = x, MARGIN = 2, FUN = mean)
# 计算矩阵x的每行均值:
row.sums <- apply(X = x, MARGIN = 1, FUN = mean)
# 在原矩阵的基础上增加边际和:
rbind(cbind(x, Rtot = row.sums), Ctot = c(col.sums, sum(col.sums)))
## x1 x2 Rtot
## a 3 4 3.5
## b 3 3 3.0
## c 3 2 2.5
## d 3 1 2.0
## e 3 2 2.5
## f 3 3 3.0
## g 3 4 3.5
## h 3 5 4.0
## Ctot 3 3 6.0
# 对矩阵x的列进行排序:
apply(X = x, MARGIN = 2, FUN = sort)
## x1 x2
## [1,] 3 1
## [2,] 3 2
## [3,] 3 2
## [4,] 3 3
## [5,] 3 3
## [6,] 3 4
## [7,] 3 4
## [8,] 3 5
# 使用自定义的函数
cave <- function(x, c1, c2) c(mean(x[c1]), mean(x[c2]))
apply(X = x, MARGIN = 1, FUN = cave, c1 = "x1", c2 = c("x1", "x2"))
## a b c d e f g h
## [1,] 3.0 3 3.0 3 3.0 3 3.0 3
## [2,] 3.5 3 2.5 2 2.5 3 3.5 4
ma <- matrix(c(1:4, 1, 6:8), nrow = 2)
ma
## [,1] [,2] [,3] [,4]
## [1,] 1 3 1 7
## [2,] 2 4 6 8
apply(X = ma, MARGIN = 1, FUN = table) #结果返回一个list
## [[1]]
##
## 1 3 7
## 2 1 1
##
## [[2]]
##
## 2 4 6 8
## 1 1 1 1
apply(ma, 1, stats::quantile)
## [,1] [,2]
## 0% 1 2.0
## 25% 1 3.5
## 50% 2 5.0
## 75% 4 6.5
## 100% 7 8.0
# 对于3维数组:
z <- array(1:24, dim = 2:4)
z
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 7 9 11
## [2,] 8 10 12
##
## , , 3
##
## [,1] [,2] [,3]
## [1,] 13 15 17
## [2,] 14 16 18
##
## , , 4
##
## [,1] [,2] [,3]
## [1,] 19 21 23
## [2,] 20 22 24
apply(X = z, MARGIN = c(1, 2), FUN = mean)
## [,1] [,2] [,3]
## [1,] 10 12 14
## [2,] 11 13 15
apply(X = z, MARGIN = c(1, 3), FUN = mean)
## [,1] [,2] [,3] [,4]
## [1,] 3 9 15 21
## [2,] 4 10 16 22
apply(X = z, MARGIN = 3, FUN = mean)
## [1] 3.5 9.5 15.5 21.5
2. lapply 对列表或者向量使用函数
lapply(X, FUN, ...)
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE, FALSE, FALSE, TRUE))
x #列表x有3个元素
## $a
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $beta
## [1] 0.04978707 0.13533528 0.36787944 1.00000000 2.71828183 7.38905610
## [7] 20.08553692
##
## $logic
## [1] TRUE FALSE FALSE TRUE
lapply(X = x, FUN = mean) #计算列表x的每个元素的均值
## $a
## [1] 5.5
##
## $beta
## [1] 4.535125
##
## $logic
## [1] 0.5
lapply(X = x, FUN = quantile, probs = 1:3/4) #计算列表的分位数
## $a
## 25% 50% 75%
## 3.25 5.50 7.75
##
## $beta
## 25% 50% 75%
## 0.2516074 1.0000000 5.0536690
##
## $logic
## 25% 50% 75%
## 0.0 0.5 1.0
3. sapply 对列表或者向量使用函数,是lapply的友好版本。
sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE)
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE, FALSE, FALSE, TRUE))
sapply(X = x, FUN = mean)
## a beta logic
## 5.500000 4.535125 0.500000
i39 <- sapply(3:9, seq) #生成一个列表
i39
## [[1]]
## [1] 1 2 3
##
## [[2]]
## [1] 1 2 3 4
##
## [[3]]
## [1] 1 2 3 4 5
##
## [[4]]
## [1] 1 2 3 4 5 6
##
## [[5]]
## [1] 1 2 3 4 5 6 7
##
## [[6]]
## [1] 1 2 3 4 5 6 7 8
##
## [[7]]
## [1] 1 2 3 4 5 6 7 8 9
sapply(i39, fivenum) #统计列表每个元素的五数总括
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1.0 1.0 1 1.0 1.0 1.0 1
## [2,] 1.5 1.5 2 2.0 2.5 2.5 3
## [3,] 2.0 2.5 3 3.5 4.0 4.5 5
## [4,] 2.5 3.5 4 5.0 5.5 6.5 7
## [5,] 3.0 4.0 5 6.0 7.0 8.0 9
4. vapply 对列表或者向量使用函数
vapply(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE)
i39 <- sapply(3:9, seq) # list of vectors
sapply(i39, fivenum)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1.0 1.0 1 1.0 1.0 1.0 1
## [2,] 1.5 1.5 2 2.0 2.5 2.5 3
## [3,] 2.0 2.5 3 3.5 4.0 4.5 5
## [4,] 2.5 3.5 4 5.0 5.5 6.5 7
## [5,] 3.0 4.0 5 6.0 7.0 8.0 9
vapply(i39, fivenum, c(Min. = 0, `1st Qu.` = 0, Median = 0, `3rd Qu.` = 0, Max. = 0))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## Min. 1.0 1.0 1 1.0 1.0 1.0 1
## 1st Qu. 1.5 1.5 2 2.0 2.5 2.5 3
## Median 2.0 2.5 3 3.5 4.0 4.5 5
## 3rd Qu. 2.5 3.5 4 5.0 5.5 6.5 7
## Max. 3.0 4.0 5 6.0 7.0 8.0 9
5. tapply 对不规则阵列使用函数
tapply(X, INDEX, FUN = NULL, ..., simplify = TRUE)
groups <- as.factor(rbinom(32, n = 5, prob = 0.4))
groups
## [1] 13 14 14 5 7
## Levels: 5 7 13 14
tapply(groups, groups, length) #- is almost the same as
## 5 7 13 14
## 1 1 1 2
table(groups)
## groups
## 5 7 13 14
## 1 1 1 2
head(warpbreaks)
## breaks wool tension
## 1 26 A L
## 2 30 A L
## 3 54 A L
## 4 25 A L
## 5 70 A L
## 6 52 A L
tapply(warpbreaks$breaks, warpbreaks[, -1], sum)
## tension
## wool L M H
## A 401 216 221
## B 254 259 169
tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum)
## tension
## L M H
## 655 475 390
n <- 17
fac <- factor(rep(1:3, length = n), levels = 1:5)
fac
## [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2
## Levels: 1 2 3 4 5
table(fac)
## fac
## 1 2 3 4 5
## 6 6 5 0 0
tapply(1:n, fac, sum)
## 1 2 3 4 5
## 51 57 45 NA NA
tapply(1:n, fac, sum, simplify = FALSE)
## $`1`
## [1] 51
##
## $`2`
## [1] 57
##
## $`3`
## [1] 45
##
## $`4`
## NULL
##
## $`5`
## NULL
tapply(1:n, fac, range)
## $`1`
## [1] 1 16
##
## $`2`
## [1] 2 17
##
## $`3`
## [1] 3 15
##
## $`4`
## NULL
##
## $`5`
## NULL
tapply(1:n, fac, quantile)
## $`1`
## 0% 25% 50% 75% 100%
## 1.00 4.75 8.50 12.25 16.00
##
## $`2`
## 0% 25% 50% 75% 100%
## 2.00 5.75 9.50 13.25 17.00
##
## $`3`
## 0% 25% 50% 75% 100%
## 3 6 9 12 15
##
## $`4`
## NULL
##
## $`5`
## NULL
6. mapply 对多个列表或者向量参数使用函数
mapply(FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE)
mapply(FUN = rep, x = 1:4, times = 4:1) #结果返回一个列表
## [[1]]
## [1] 1 1 1 1
##
## [[2]]
## [1] 2 2 2
##
## [[3]]
## [1] 3 3
##
## [[4]]
## [1] 4
rep(x = 1:4, times = 4:1) #结果返回一个向量
## [1] 1 1 1 1 2 2 2 3 3 4
mapply(FUN = rep, times = 1:4, x = 4:1)
## [[1]]
## [1] 4
##
## [[2]]
## [1] 3 3
##
## [[3]]
## [1] 2 2 2
##
## [[4]]
## [1] 1 1 1 1
mapply(rep, times = 1:4, MoreArgs = list(x = 42))
## [[1]]
## [1] 42
##
## [[2]]
## [1] 42 42
##
## [[3]]
## [1] 42 42 42
##
## [[4]]
## [1] 42 42 42 42