本週作業與進度:
1. 研讀eapply()
2. 研讀rapply()
3. 研讀 Advanced R: Functional programming
4. 研讀 Advanced R: Functionals
set.seed(seed = 100)
randomize <- function(f) f(runif(1e3))
randomize(mean)
## [1] 0.5180817
randomize(sd)
## [1] 0.2929122
randomize(median)
## [1] 0.5057337
g <- function(f, x) f(x)
g(f = max, c(3, 1, 5))
## [1] 5
h <- function(f, ...) f(...)
h(f = max, c(3, 1, 5))
## [1] 5
h(f = mean, c(1, 3, 4, NA, 4), na.rm = TRUE)
## [1] 3
?mtcars
class(mtcars)
## [1] "data.frame"
my_mtcar <- as.matrix(mtcars)
class(my_mtcar)
## [1] "matrix"
apply(X = my_mtcar, MARGIN = 2, mean)
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
apply(X = my_mtcar, MARGIN = 2, max)
## mpg cyl disp hp drat wt qsec vs am gear
## 33.900 8.000 472.000 335.000 4.930 5.424 22.900 1.000 1.000 5.000
## carb
## 8.000
apply(X = my_mtcar, MARGIN = 1, function(v) colnames(mtcars)[which.max(v)])
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## "disp" "disp" "disp" "disp"
## Hornet Sportabout Valiant Duster 360 Merc 240D
## "disp" "disp" "disp" "disp"
## Merc 230 Merc 280 Merc 280C Merc 450SE
## "disp" "disp" "disp" "disp"
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## "disp" "disp" "disp" "disp"
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## "disp" "disp" "disp" "disp"
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## "disp" "disp" "disp" "disp"
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## "disp" "disp" "disp" "hp"
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## "disp" "hp" "hp" "disp"
m = matrix(1:6, nrow = 3)
m
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
f <- function(x) x/c(2, 4)
apply(X = m, MARGIN = 1, f)
## [,1] [,2] [,3]
## [1,] 0.5 1.00 1.5
## [2,] 1.0 1.25 1.5
# apply(m, 2, f)
?lapply
set.seed(seed = 123)
d <- list(rnorm(n = 30, mean = 5, sd = 10), rnorm(n = 50, mean = 5, sd = 10), rnorm(n = 100, mean = 8, sd = 15))
lapply(X = d, mean)
## [[1]]
## [1] 4.528962
##
## [[2]]
## [1] 5.590362
##
## [[3]]
## [1] 7.912756
lapply(X = d, function(v) which(v > 0))
## [[1]]
## [1] 2 3 4 5 6 7 10 11 12 13 14 16 17 19 20 22 27 28 30
##
## [[2]]
## [1] 1 2 3 4 5 6 7 8 9 10 12 14 15 17 18 19 20 21 22 23 24 25 26 28 29
## [26] 30 31 33 36 37 38 39 40 41 43 46 47 49 50
##
## [[3]]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 15 17 18 19 22 23
## [20] 24 26 29 30 32 34 35 36 37 41 43 44 45 47 48 50 51 52 53
## [39] 54 56 58 59 61 62 66 68 69 71 72 73 75 76 77 78 79 80 81
## [58] 84 85 86 87 88 89 90 91 92 93 94 97 98 99 100
lapply(X = d, function(v, c) which(v > c), 0)
## [[1]]
## [1] 2 3 4 5 6 7 10 11 12 13 14 16 17 19 20 22 27 28 30
##
## [[2]]
## [1] 1 2 3 4 5 6 7 8 9 10 12 14 15 17 18 19 20 21 22 23 24 25 26 28 29
## [26] 30 31 33 36 37 38 39 40 41 43 46 47 49 50
##
## [[3]]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 15 17 18 19 22 23
## [20] 24 26 29 30 32 34 35 36 37 41 43 44 45 47 48 50 51 52 53
## [39] 54 56 58 59 61 62 66 68 69 71 72 73 75 76 77 78 79 80 81
## [58] 84 85 86 87 88 89 90 91 92 93 94 97 98 99 100
lapply(X = d, function(v, f) which(v > f(v)), f = mean)
## [[1]]
## [1] 3 4 5 6 7 11 12 13 14 16 17 19 27 28 30
##
## [[2]]
## [1] 1 3 4 5 6 7 14 15 19 21 24 26 28 29 30 31 36 37 39 40 43 46 49
##
## [[3]]
## [1] 1 2 4 6 7 8 10 11 12 13 15 17 18 22 30 32 35 36 37 41 45 47 48 51 52
## [26] 53 56 58 59 61 68 69 71 72 73 77 79 81 84 86 87 89 90 92 94 97 98 99
lapply2 <- function(x, f, ...) {
out <- vector("list", length(x))
for (i in seq_along(x)) {
out[[i]] <- f(x[[i]], ...)
}
out
}
set.seed(seed = 123)
l <- replicate(20, runif(sample(1:10, 1)), simplify = FALSE)
l
## [[1]]
## [1] 0.8830174 0.9404673 0.0455565
##
## [[2]]
## [1] 0.892419 0.551435
##
## [[3]]
## [1] 0.9568333 0.4533342 0.6775706 0.5726334 0.1029247
##
## [[4]]
## [1] 0.3279207 0.9545036 0.8895393 0.6928034 0.6405068
##
## [[5]]
## [1] 0.6557058 0.7085305 0.5440660 0.5941420 0.2891597 0.1471136 0.9630242
## [8] 0.9022990 0.6907053
##
## [[6]]
## [1] 0.02461368 0.47779597 0.75845954 0.21640794
##
## [[7]]
## [1] 0.2316258 0.1428000 0.4145463 0.4137243 0.3688455
##
## [[8]]
## [1] 0.13880606 0.23303410 0.46596245 0.26597264 0.85782772 0.04583117 0.44220007
##
## [[9]]
## [1] 0.1218993 0.5609480 0.2065314 0.1275317 0.7533079 0.8950454 0.3744628
##
## [[10]]
## [1] 0.09484066 0.38396964 0.27438364 0.81464004 0.44851634
##
## [[11]]
## [1] 0.8123895
##
## [[12]]
## [1] 0.7544751586 0.6292211316 0.7101824014 0.0006247733 0.4753165741
## [6] 0.2201188852 0.3798165377 0.6127710033 0.3517979092
##
## [[13]]
## [1] 0.2436195 0.6680556 0.4176468 0.7881958
##
## [[14]]
## [1] 0.4348927 0.9849570 0.8930511 0.8864691 0.1750527 0.1306957
##
## [[15]]
## [1] 0.3435165 0.6567581
##
## [[16]]
## [1] 0.18769112 0.78229430 0.09359499 0.46677904
##
## [[17]]
## [1] 0.5999890 0.3328235 0.4886130
##
## [[18]]
## [1] 0.48290240 0.89035022 0.91443819 0.60873498 0.41068978 0.14709469 0.93529980
## [8] 0.30122890 0.06072057
##
## [[19]]
## [1] 0.1422943 0.5492847 0.9540912 0.5854834 0.4045103 0.6478935 0.3198206
## [8] 0.3077200 0.2197676
##
## [[20]]
## [1] 0.9842192 0.1542023 0.0910440 0.1419069 0.6900071 0.6192565 0.8913941
# With a for loop
out <- vector("list", length(l))
for (i in seq_along(l)) {
out[[i]] <- length(l[[i]])
}
unlist(out)
## [1] 3 2 5 5 9 4 5 7 7 5 1 9 4 6 2 4 3 9 9 7
# With lapply
unlist(lapply(l, length))
## [1] 3 2 5 5 9 4 5 7 7 5 1 9 4 6 2 4 3 9 9 7
set.seed(seed = 123)
d <- list(rnorm(n = 30, mean = 5, sd = 10), rnorm(n = 50, mean = 5, sd = 10), rnorm(n = 100, mean = 8, sd = 15))
lapply(d, function(x) sum(x > 0))
## [[1]]
## [1] 19
##
## [[2]]
## [1] 39
##
## [[3]]
## [1] 72
sapply(d, function(x) sum(x > 0))
## [1] 19 39 72
lapply(d, quantile, probs = 1:3/4)
## [[1]]
## 25% 50% 75%
## -1.713995 4.262667 9.886169
##
## [[2]]
## 25% 50% 75%
## 0.4928728 4.6429139 11.6263363
##
## [[3]]
## 25% 50% 75%
## -1.466591 7.245577 16.280677
sapply(d, quantile, probs = 1:3/4)
## [,1] [,2] [,3]
## 25% -1.713995 0.4928728 -1.466591
## 50% 4.262667 4.6429139 7.245577
## 75% 9.886169 11.6263363 16.280677
lapply(d, function(x) which(x > 0))
## [[1]]
## [1] 2 3 4 5 6 7 10 11 12 13 14 16 17 19 20 22 27 28 30
##
## [[2]]
## [1] 1 2 3 4 5 6 7 8 9 10 12 14 15 17 18 19 20 21 22 23 24 25 26 28 29
## [26] 30 31 33 36 37 38 39 40 41 43 46 47 49 50
##
## [[3]]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 15 17 18 19 22 23
## [20] 24 26 29 30 32 34 35 36 37 41 43 44 45 47 48 50 51 52 53
## [39] 54 56 58 59 61 62 66 68 69 71 72 73 75 76 77 78 79 80 81
## [58] 84 85 86 87 88 89 90 91 92 93 94 97 98 99 100
sapply(d, function(x) which(x > 0))
## [[1]]
## [1] 2 3 4 5 6 7 10 11 12 13 14 16 17 19 20 22 27 28 30
##
## [[2]]
## [1] 1 2 3 4 5 6 7 8 9 10 12 14 15 17 18 19 20 21 22 23 24 25 26 28 29
## [26] 30 31 33 36 37 38 39 40 41 43 46 47 49 50
##
## [[3]]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 15 17 18 19 22 23
## [20] 24 26 29 30 32 34 35 36 37 41 43 44 45 47 48 50 51 52 53
## [39] 54 56 58 59 61 62 66 68 69 71 72 73 75 76 77 78 79 80 81
## [58] 84 85 86 87 88 89 90 91 92 93 94 97 98 99 100
compute_mean <- list(
base = function(x) mean(x),
sum = function(x) sum(x) / length(x),
manual = function(x) {
total <- 0
n <- length(x)
for (i in seq_along(x)) {
total <- total + x[i] / n
}
total
}
)
x <- runif(1e5)
system.time(compute_mean$base(x))
## user system elapsed
## 0 0 0
system.time(compute_mean[[2]](x))
## user system elapsed
## 0 0 0
system.time(compute_mean[["manual"]](x))
## user system elapsed
## 0.009 0.001 0.009
lapply(compute_mean, function(f) f(x))
## $base
## [1] 0.4992423
##
## $sum
## [1] 0.4992423
##
## $manual
## [1] 0.4992423
call_fun <- function(f, ...) f(...)
lapply(compute_mean, call_fun, x)
## $base
## [1] 0.4992423
##
## $sum
## [1] 0.4992423
##
## $manual
## [1] 0.4992423
lapply(compute_mean, function(f) system.time(f(x)))
## $base
## user system elapsed
## 0 0 0
##
## $sum
## user system elapsed
## 0.000 0.000 0.001
##
## $manual
## user system elapsed
## 0.005 0.000 0.005
set.seed(seed = 123)
d <- list(rnorm(n = 30, mean = 5, sd = 10), rnorm(n = 50, mean = 5, sd = 10), rnorm(n = 100, mean = 8, sd = 15))
vapply(X = d, max, FUN.VALUE = 0)
## [1] 22.86913 26.68956 56.61560
vapply(X = d, quantile, FUN.VALUE = c(0, 0, 0, 0, 0))
## [,1] [,2] [,3]
## 0% -14.666172 -18.0916888 -22.798708
## 25% -1.713995 0.4928728 -1.466591
## 50% 4.262667 4.6429139 7.245577
## 75% 9.886169 11.6263363 16.280677
## 100% 22.869131 26.6895597 56.615599
vapply(X = d, quantile, FUN.VALUE = c(10, 10, 110, 10, 0))
## [,1] [,2] [,3]
## 0% -14.666172 -18.0916888 -22.798708
## 25% -1.713995 0.4928728 -1.466591
## 50% 4.262667 4.6429139 7.245577
## 75% 9.886169 11.6263363 16.280677
## 100% 22.869131 26.6895597 56.615599
# vapply(X = d, quantile, FUN.VALUE = character(length = 5))
# Error in vapply(X = d, quantile, FUN.VALUE = character(length = 5)) :
# 值的類型必須是 'character',
# 但是 FUN(X[[1]]) 的結果類型是 'double'
# vapply(X = d, quantile, FUN.VALUE = numeric(length = 4))
# Error in vapply(X = d, quantile, FUN.VALUE = numeric(length = 4)) :
# 值的長度必須是 4,
# 但是 FUN(X[[1]]) 的結果長度是 5
set.seed(seed = 123)
xs <- replicate(5, runif(10), simplify = FALSE)
ws <- replicate(5, rpois(10, 5) + 1, simplify = FALSE)
# It’s easy to use lapply() to compute the unweighted means:
unlist(lapply(xs, mean))
## [1] 0.5782475 0.5233693 0.6155837 0.5378580 0.3453964
# But, for weighted means?
weighted.mean(x = xs[[1]], w = ws[[1]])
## [1] 0.5989783
weighted.mean(x = xs[[2]], w = ws[[2]])
## [1] 0.4798772
unlist(lapply(seq_along(xs), function(i) {
weighted.mean(xs[[i]], ws[[i]])
}))
## [1] 0.5989783 0.4798772 0.6175470 0.5619867 0.3535632
#
?mapply
mapply(FUN = weighted.mean, xs, ws)
## [1] 0.5989783 0.4798772 0.6175470 0.5619867 0.3535632
mapply(FUN = weighted.mean, xs, ws, SIMPLIFY = FALSE)
## [[1]]
## [1] 0.5989783
##
## [[2]]
## [1] 0.4798772
##
## [[3]]
## [1] 0.617547
##
## [[4]]
## [1] 0.5619867
##
## [[5]]
## [1] 0.3535632
f <- function(x, y, z) (x + y)^z
mapply(f, x = 1:5, y = seq(0, 3, length.out = 5), MoreArgs = list(z = 0.5), SIMPLIFY = FALSE)
## [[1]]
## [1] 1
##
## [[2]]
## [1] 1.658312
##
## [[3]]
## [1] 2.12132
##
## [[4]]
## [1] 2.5
##
## [[5]]
## [1] 2.828427
Map
## function (f, ...)
## {
## f <- match.fun(f)
## mapply(FUN = f, ..., SIMPLIFY = FALSE)
## }
## <bytecode: 0x7fdc01e07230>
## <environment: namespace:base>
set.seed(seed = 123)
v <- sample(x = month.abb, size = 50, replace = TRUE)
vf <- factor(x = v) # 定義factor
attributes(vf) # 查詢物件所有的屬性
## $levels
## [1] "Apr" "Aug" "Dec" "Feb" "Jan" "Jul" "Jun" "Mar" "May" "Nov" "Oct" "Sep"
##
## $class
## [1] "factor"
sort(vf) # 排序,預設遞增排序
## [1] Apr Apr Aug Aug Dec Dec Dec Dec Feb Feb Jan Jul Jul Jul Jul Jul Jun Jun Jun
## [20] Mar Mar Mar Mar Mar May May May May May May Nov Nov Nov Nov Nov Nov Oct Oct
## [39] Oct Oct Oct Oct Sep Sep Sep Sep Sep Sep Sep Sep
## Levels: Apr Aug Dec Feb Jan Jul Jun Mar May Nov Oct Sep
vf <- factor(x = v, levels = month.abb)
sort(vf)
## [1] Jan Feb Feb Mar Mar Mar Mar Mar Apr Apr May May May May May May Jun Jun Jun
## [20] Jul Jul Jul Jul Jul Aug Aug Sep Sep Sep Sep Sep Sep Sep Sep Oct Oct Oct Oct
## [39] Oct Oct Nov Nov Nov Nov Nov Nov Dec Dec Dec Dec
## Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
levels(vf)
## [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
nlevels(vf)
## [1] 12
vf
## [1] Mar Mar Oct Feb Jun Nov May Apr Jun Sep Oct Nov May Mar Nov Sep Dec Sep Sep
## [20] Mar Aug Oct Jul Oct Sep Mar Apr Jan Nov Jul May Dec Oct Jul Sep Sep Oct Jul
## [39] Nov Dec May Jul May Nov Jun Sep Feb May Aug Dec
## Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
unclass(vf)
## [1] 3 3 10 2 6 11 5 4 6 9 10 11 5 3 11 9 12 9 9 3 8 10 7 10 9
## [26] 3 4 1 11 7 5 12 10 7 9 9 10 7 11 12 5 7 5 11 6 9 2 5 8 12
## attr(,"levels")
## [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
typeof(vf)
## [1] "integer"
set.seed(seed = 123)
dim(iris)
## [1] 150 5
sapply(iris, class)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## "numeric" "numeric" "numeric" "numeric" "factor"
tapply(iris$Sepal.Length, INDEX = iris$Species, mean)
## setosa versicolor virginica
## 5.006 5.936 6.588
tapply(X = iris$Sepal.Length, INDEX = list(iris$Species,
iris$Sepal.Width > mean(iris$Sepal.Width)),
FUN = mean)
## FALSE TRUE
## setosa 4.637500 5.076190
## versicolor 5.830952 6.487500
## virginica 6.475758 6.805882
ans <- tapply(X = iris$Sepal.Length, INDEX = list(iris$Species,
iris$Sepal.Width > mean(iris$Sepal.Width)),
FUN = quantile)
ans
## FALSE TRUE
## setosa Numeric,5 Numeric,5
## versicolor Numeric,5 Numeric,5
## virginica Numeric,5 Numeric,5
mat <- matrix(1:15, nrow = 5)
mat
## [,1] [,2] [,3]
## [1,] 1 6 11
## [2,] 2 7 12
## [3,] 3 8 13
## [4,] 4 9 14
## [5,] 5 10 15
apply(X = mat, MARGIN = 2, cumsum)
## [,1] [,2] [,3]
## [1,] 1 6 11
## [2,] 3 13 23
## [3,] 6 21 36
## [4,] 10 30 50
## [5,] 15 40 65
t(apply(X = mat, MARGIN = 1, cumsum))
## [,1] [,2] [,3]
## [1,] 1 7 18
## [2,] 2 9 21
## [3,] 3 11 24
## [4,] 4 13 27
## [5,] 5 15 30
tmp <- tapply(X = mat, INDEX = row(mat), cumsum)
tmp
## $`1`
## [1] 1 7 18
##
## $`2`
## [1] 2 9 21
##
## $`3`
## [1] 3 11 24
##
## $`4`
## [1] 4 13 27
##
## $`5`
## [1] 5 15 30
do.call(rbind, tmp)
## [,1] [,2] [,3]
## 1 1 7 18
## 2 2 9 21
## 3 3 11 24
## 4 4 13 27
## 5 5 15 30
tapply2 <- function(X, INDEX, FUN, ..., simplify = TRUE) {
pieces <- split(X, INDEX)
sapply(pieces, FUN = FUN, simplify = simplify)
}
?split
?Map
Map
## function (f, ...)
## {
## f <- match.fun(f)
## mapply(FUN = f, ..., SIMPLIFY = FALSE)
## }
## <bytecode: 0x7fdc01e07230>
## <environment: namespace:base>
Reduce(`+`, 1:3) # -> ((1 + 2) + 3)
## [1] 6
Reduce(sum, 1:3) # -> sum(sum(1, 2), 3)
## [1] 6
set.seed(seed = 123)
l <- replicate(5, sample(1:10, 15, replace = T), simplify = FALSE)
str(l)
## List of 5
## $ : int [1:15] 3 3 10 2 6 5 4 6 9 10 ...
## $ : int [1:15] 3 8 10 7 10 9 3 4 1 7 ...
## $ : int [1:15] 10 7 5 7 5 6 9 2 5 8 ...
## $ : int [1:15] 5 9 10 4 6 8 6 6 7 1 ...
## $ : int [1:15] 5 6 3 9 4 6 9 9 7 3 ...
如果你想找出所有資料的交集:
intersect(intersect(intersect(intersect(l[[1]], l[[2]]), l[[3]]), l[[4]]), l[[5]])
## [1] 5 9
Reduce(f = intersect, x = l)
## [1] 5 9