lapply()

lapply để run functions lặp lại cho một nhóm các đối tượng

x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
x
## $a
## [1] 1 2 3 4
## 
## $b
##  [1]  0.44899476 -0.34083084  0.34853326  1.14808473 -0.07618802 -0.29947369
##  [7]  0.40756918 -1.08342935  0.28415112  0.79421043
## 
## $c
##  [1]  0.1831717513 -1.3764995555  1.0328178379  0.6498577211  2.5776086438
##  [6]  1.9290229365  1.2279577308  0.3422590467  0.1465789761  0.6548412349
## [11]  1.9241546366  1.7236659692  0.0004261627  1.8049186650  0.0489684161
## [16]  1.4140868694  2.3744689834  1.0589785459  1.3150997296  0.9783728059
## 
## $d
##   [1] 4.693842 6.118469 4.963846 4.039952 5.428887 3.196418 5.615257 3.896050
##   [9] 5.271312 4.546851 5.841589 4.919419 4.597131 4.447585 4.795010 5.158126
##  [17] 5.204699 4.302329 5.920463 4.604543 4.424102 5.324469 4.693661 4.968281
##  [25] 3.702152 6.247600 4.374264 4.760129 5.841408 6.229371 4.202510 5.727730
##  [33] 5.489549 5.636130 5.718669 4.022112 2.794614 5.314578 5.122858 4.745450
##  [41] 6.911959 5.705176 4.319500 5.580554 4.925161 5.882415 5.041602 6.230771
##  [49] 5.073165 4.899730 6.079619 5.145594 4.728582 2.144772 4.266203 5.134112
##  [57] 5.108570 3.756038 4.550302 3.587848 6.036117 2.791455 6.397301 6.839394
##  [65] 4.915022 5.622709 5.015916 4.441438 3.300368 5.726474 5.858424 5.889120
##  [73] 4.479478 6.140046 5.005719 5.471583 6.063969 4.239001 5.227279 5.976799
##  [81] 4.944041 3.973517 4.615122 4.623892 6.953635 5.970248 4.889914 5.050718
##  [89] 4.106434 5.254330 6.839574 2.279943 4.690487 5.528427 4.365232 5.007320
##  [97] 4.618103 5.675009 5.973018 6.234608
lapply(x, mean)
## $a
## [1] 2.5
## 
## $b
## [1] 0.1631622
## 
## $c
## [1] 1.000538
## 
## $d
## [1] 5.030043

áp dụng lapply() vào data frame

library(datasets)
head(airquality)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

Spliting data frame by groups

## List of 5
##  $ 5:'data.frame':   31 obs. of  6 variables:
##   ..$ Ozone  : int [1:31] 41 36 12 18 NA 28 23 19 8 NA ...
##   ..$ Solar.R: int [1:31] 190 118 149 313 NA NA 299 99 19 194 ...
##   ..$ Wind   : num [1:31] 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##   ..$ Temp   : int [1:31] 67 72 74 62 56 66 65 59 61 69 ...
##   ..$ Month  : int [1:31] 5 5 5 5 5 5 5 5 5 5 ...
##   ..$ Day    : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
##  $ 6:'data.frame':   30 obs. of  6 variables:
##   ..$ Ozone  : int [1:30] NA NA NA NA NA NA 29 NA 71 39 ...
##   ..$ Solar.R: int [1:30] 286 287 242 186 220 264 127 273 291 323 ...
##   ..$ Wind   : num [1:30] 8.6 9.7 16.1 9.2 8.6 14.3 9.7 6.9 13.8 11.5 ...
##   ..$ Temp   : int [1:30] 78 74 67 84 85 79 82 87 90 87 ...
##   ..$ Month  : int [1:30] 6 6 6 6 6 6 6 6 6 6 ...
##   ..$ Day    : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
##  $ 7:'data.frame':   31 obs. of  6 variables:
##   ..$ Ozone  : int [1:31] 135 49 32 NA 64 40 77 97 97 85 ...
##   ..$ Solar.R: int [1:31] 269 248 236 101 175 314 276 267 272 175 ...
##   ..$ Wind   : num [1:31] 4.1 9.2 9.2 10.9 4.6 10.9 5.1 6.3 5.7 7.4 ...
##   ..$ Temp   : int [1:31] 84 85 81 84 83 83 88 92 92 89 ...
##   ..$ Month  : int [1:31] 7 7 7 7 7 7 7 7 7 7 ...
##   ..$ Day    : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
##  $ 8:'data.frame':   31 obs. of  6 variables:
##   ..$ Ozone  : int [1:31] 39 9 16 78 35 66 122 89 110 NA ...
##   ..$ Solar.R: int [1:31] 83 24 77 NA NA NA 255 229 207 222 ...
##   ..$ Wind   : num [1:31] 6.9 13.8 7.4 6.9 7.4 4.6 4 10.3 8 8.6 ...
##   ..$ Temp   : int [1:31] 81 81 82 86 85 87 89 90 90 92 ...
##   ..$ Month  : int [1:31] 8 8 8 8 8 8 8 8 8 8 ...
##   ..$ Day    : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
##  $ 9:'data.frame':   30 obs. of  6 variables:
##   ..$ Ozone  : int [1:30] 96 78 73 91 47 32 20 23 21 24 ...
##   ..$ Solar.R: int [1:30] 167 197 183 189 95 92 252 220 230 259 ...
##   ..$ Wind   : num [1:30] 6.9 5.1 2.8 4.6 7.4 15.5 10.9 10.3 10.9 9.7 ...
##   ..$ Temp   : int [1:30] 91 92 93 93 87 84 80 78 75 73 ...
##   ..$ Month  : int [1:30] 9 9 9 9 9 9 9 9 9 9 ...
##   ..$ Day    : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...

calculating multiple colmeans by groups

Để tính mean của các collumn trong data frame theo các tháng khác nhau (airquality$Month)

lapply(s, function(x) {
         colMeans(x[, c("Ozone", "Solar.R", "Wind")])
 })
## $`5`
##    Ozone  Solar.R     Wind 
##       NA       NA 11.62258 
## 
## $`6`
##     Ozone   Solar.R      Wind 
##        NA 190.16667  10.26667 
## 
## $`7`
##      Ozone    Solar.R       Wind 
##         NA 216.483871   8.941935 
## 
## $`8`
##    Ozone  Solar.R     Wind 
##       NA       NA 8.793548 
## 
## $`9`
##    Ozone  Solar.R     Wind 
##       NA 167.4333  10.1800

sapply() có chức năng tương tự lapply() tuy nhiên nó Cho bảng kết quả tinh gọn hơn lapply()

sapply(s, function(x) {
         colMeans(x[, c("Ozone", "Solar.R", "Wind")])
 })
##                5         6          7        8        9
## Ozone         NA        NA         NA       NA       NA
## Solar.R       NA 190.16667 216.483871       NA 167.4333
## Wind    11.62258  10.26667   8.941935 8.793548  10.1800

Removing NA

sapply(s, function(x) {
         colMeans(x[, c("Ozone", "Solar.R", "Wind")], na.rm = T)
 })
##                 5         6          7          8         9
## Ozone    23.61538  29.44444  59.115385  59.961538  31.44828
## Solar.R 181.29630 190.16667 216.483871 171.857143 167.43333
## Wind     11.62258  10.26667   8.941935   8.793548  10.18000