本文档描R中apply函数族的用法。
apply(array, margin, function, ...)
Applies a function to sections of an array and returns the results in an array.
m <- matrix(1:16, ncol = 4)
print(m)
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
apply(m, 1, sum) # 按行
[1] 28 32 36 40
apply(m, 2, sum) # 按列
[1] 10 26 42 58
apply(m, 1, function(x) sum(x) + 2) # 使用自定义(匿名)函数
[1] 30 34 38 42
lapply(list, function, ...)
Applies a function to elements in a list or a vector and returns the results in a list.
df1 <- data.frame(m)
print(df1)
X1 X2 X3 X4
1 1 5 9 13
2 2 6 10 14
3 3 7 11 15
4 4 8 12 16
lapply(df1, sum)
$X1
[1] 10
$X2
[1] 26
$X3
[1] 42
$X4
[1] 58
# 使用lapply代替显式循环
lapply(1:5, function(x) x^2)
[[1]]
[1] 1
[[2]]
[1] 4
[[3]]
[1] 9
[[4]]
[1] 16
[[5]]
[1] 25
sapply(list, function, ..., simplify = TRUE)
Applies a function to elements in a list and returns the results in a vector, matrix or a list.
# 返回向量
sapply(df1, sum)
X1 X2 X3 X4
10 26 42 58
# 若funtion返回向量,则sapply返回矩阵
sapply(df1, range)
X1 X2 X3 X4
[1,] 1 5 9 13
[2,] 4 8 12 16
vapply(list, function, function.value, ...)
# vapply指定返回值的类型,若与指定的类型不符就会报错
vapply(df1, fivenum, c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0))
X1 X2 X3 X4
Min. 1.0 5.0 9.0 13.0
1st Qu. 1.5 5.5 9.5 13.5
Median 2.5 6.5 10.5 14.5
3rd Qu. 3.5 7.5 11.5 15.5
Max. 4.0 8.0 12.0 16.0
vapply(df1, fivenum, c(Min. = "", "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0))
Error in vapply(df1, fivenum, c(Min. = "", `1st Qu.` = 0, Median = 0, : 值的种类必需是'character',
但FUN(X[[1]])结果的种类却是'double'
tapply(array, indicies, function, ..., simplify = TRUE)
Applies a function to each cell of a ragged array.
x1 <- runif(16)
index1 <- rep(1:4, 4) # 分组变量1
index2 <- c(rep(1, 8), rep(2, 8)) # 分组变量2
df2 <- data.frame(x1, index1, index2)
print(df2)
x1 index1 index2
1 0.97019462 1 1
2 0.98782881 2 1
3 0.95165966 3 1
4 0.88527474 4 1
5 0.34325478 1 1
6 0.66533882 2 1
7 0.62725130 3 1
8 0.34001986 4 1
9 0.31779573 1 2
10 0.20940271 2 2
11 0.05079445 3 2
12 0.72363907 4 2
13 0.45179433 1 2
14 0.11114236 2 2
15 0.71166645 3 2
16 0.33201999 4 2
tapply(df2$x1, df2$index1, mean) # 按分组变量1求各组均值
1 2 3 4
0.5207599 0.4934282 0.5853430 0.5702384
tapply(df2$x1, list(df2$index1, df2$index2), mean) # 按分组变量1和分组变量2的交叉求各组均值
1 2
1 0.6567247 0.3847950
2 0.8265838 0.1602725
3 0.7894555 0.3812304
4 0.6126473 0.5278295
mapply(function, ..., MoreArgs = NULL, simplify = TRUE)
Apply a function to multiple list or vector arguments.
mapply(rep, 1:5, 5:1)
[[1]]
[1] 1 1 1 1 1
[[2]]
[1] 2 2 2 2
[[3]]
[1] 3 3 3
[[4]]
[1] 4 4
[[5]]
[1] 5
mapply(sum, 1:5, MoreArgs = list(x = 10))
[1] 11 12 13 14 15
sweep(array, margin, stats, function, ...)
Returns an array like the input array with stats swept out.
colMeans(df1) # df1每列均值
X1 X2 X3 X4
2.5 6.5 10.5 14.5
sweep(df1, 2, colMeans(df1), "-") # df1每列数据减去当列均值
X1 X2 X3 X4
1 -1.5 -1.5 -1.5 -1.5
2 -0.5 -0.5 -0.5 -0.5
3 0.5 0.5 0.5 0.5
4 1.5 1.5 1.5 1.5
by(data.frame, indicies, function, ..., simplify = TRUE)by() is an object-oriented wrapper for tapply applied to data frames.
# by是tapply的面向对象版本
by(df2$x1, list(df2$index1, df2$index2), mean)
: 1
: 1
[1] 0.6567247
--------------------------------------------------------
: 2
: 1
[1] 0.8265838
--------------------------------------------------------
: 3
: 1
[1] 0.7894555
--------------------------------------------------------
: 4
: 1
[1] 0.6126473
--------------------------------------------------------
: 1
: 2
[1] 0.384795
--------------------------------------------------------
: 2
: 2
[1] 0.1602725
--------------------------------------------------------
: 3
: 2
[1] 0.3812304
--------------------------------------------------------
: 4
: 2
[1] 0.5278295
aggregate(data.frame, indicies, function, ..., simplify = TRUE)
Splits the data into subsets, computes summary statistics for each, and returns the result in a convenient form.
# aggregate可以使用返回值为多值的函数
aggregate(df2$x1, list(df2$index1, df2$index2), fivenum)
Group.1 Group.2 x.1 x.2 x.3 x.4 x.5
1 1 1 0.34325478 0.34325478 0.65672470 0.97019462 0.97019462
2 2 1 0.66533882 0.66533882 0.82658381 0.98782881 0.98782881
3 3 1 0.62725130 0.62725130 0.78945548 0.95165966 0.95165966
4 4 1 0.34001986 0.34001986 0.61264730 0.88527474 0.88527474
5 1 2 0.31779573 0.31779573 0.38479503 0.45179433 0.45179433
6 2 2 0.11114236 0.11114236 0.16027253 0.20940271 0.20940271
7 3 2 0.05079445 0.05079445 0.38123045 0.71166645 0.71166645
8 4 2 0.33201999 0.33201999 0.52782953 0.72363907 0.72363907