• An aggregating function, like for example the mean, or the sum (that return a number or scalar);
  • Transforming or subsetting functions; and
  • Vectorized functions, which yield more complex structures like lists, vectors, matrices, and arrays.

  • Family is made up of the apply(), lapply() , sapply(), vapply(), mapply(), rapply(), and tapply() functions.

##              sr pop15 pop75     dpi ddpi
## Australia 11.43 29.35  2.87 2329.68 2.87
## Austria   12.07 23.32  4.41 1507.99 3.93
## Belgium   13.17 23.80  4.43 2108.47 3.82
## Bolivia    5.75 41.89  1.67  189.13 0.22
## Brazil    12.88 42.19  0.83  728.47 4.56
## Canada     8.79 31.72  2.85 2982.88 2.43
## 'data.frame':    50 obs. of  5 variables:
##  $ sr   : num  11.43 12.07 13.17 5.75 12.88 ...
##  $ pop15: num  29.4 23.3 23.8 41.9 42.2 ...
##  $ pop75: num  2.87 4.41 4.43 1.67 0.83 2.85 1.34 0.67 1.06 1.14 ...
##  $ dpi  : num  2330 1508 2108 189 728 ...
##  $ ddpi : num  2.87 3.93 3.82 0.22 4.56 2.43 2.67 6.51 3.08 2.8 ...
Function Argument Input Output
apply apply(x, MARGIN, FUN) Data frame or matrix vector, list, array
lapply lapply(X, FUN) List, vector or data frame List
sapply sapply(X, FUN) List, vector or data frame vector or matrix

apply()

  • Returns a vector or array or list of values obtained by applying a function to margins of an array or matrix.

apply(X,Margin,Fun) FUN: R function ,also includes a User Defined Function ex)sum, mean, max, min, median, var..

head(data)
##              sr pop15 pop75     dpi ddpi
## Australia 11.43 29.35  2.87 2329.68 2.87
## Austria   12.07 23.32  4.41 1507.99 3.93
## Belgium   13.17 23.80  4.43 2108.47 3.82
## Bolivia    5.75 41.89  1.67  189.13 0.22
## Brazil    12.88 42.19  0.83  728.47 4.56
## Canada     8.79 31.72  2.85 2982.88 2.43
apply(data,2,mean) #column mean
##        sr     pop15     pop75       dpi      ddpi 
##    9.6710   35.0896    2.2930 1106.7584    3.7576
apply(data,1,mean) #row mean
##      Australia        Austria        Belgium        Bolivia         Brazil 
##        475.240        310.344        430.738         47.732        157.786 
##         Canada          Chile          China       Colombia     Costa Rica 
##        605.734        141.442         70.670         66.482        106.720 
##        Denmark        Ecuador        Finland         France        Germany 
##        509.144         68.210        345.404        452.148        499.954 
##         Greece      Guatamala       Honduras        Iceland          India 
##        183.304         68.224         58.246        387.920         28.350 
##        Ireland          Italy          Japan          Korea     Luxembourg 
##        237.926        287.164        263.102         52.024        497.368 
##          Malta         Norway    Netherlands    New Zealand      Nicaragua 
##        131.932        454.904        358.194        307.146         76.314 
##         Panama       Paraguay           Peru    Philippines       Portugal 
##        124.274         53.168         91.780         42.834        126.258 
##   South Africa South Rhodesia          Spain         Sweden    Switzerland 
##        139.732         59.940        163.104        667.068        535.002 
##         Turkey        Tunisia United Kingdom  United States      Venezuela 
##         88.450         60.228        370.296        809.028        174.088 
##         Zambia        Jamaica        Uruguay          Libya       Malaysia 
##         41.568         88.254        161.702         38.988         60.068
head(apply(data,1:2, function(x) x/2),5) #UDF
##              sr  pop15 pop75      dpi  ddpi
## Australia 5.715 14.675 1.435 1164.840 1.435
## Austria   6.035 11.660 2.205  753.995 1.965
## Belgium   6.585 11.900 2.215 1054.235 1.910
## Bolivia   2.875 20.945 0.835   94.565 0.110
## Brazil    6.440 21.095 0.415  364.235 2.280

lapply()

  • lapply returns a list of the same length as X, each element of which is the result of applying FUN to the corresponding element of X.
  • Used for comparisons or aggregations from different dataframes.
library(knitr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(kableExtra)
A<- cars
B<- women
C<- rock
dataset_list <- list(A,B,C)
lapply(dataset_list,"[",,2)#extract 2nd column from multiple dataframe
## [[1]]
##  [1]   2  10   4  22  16  10  18  26  34  17  28  14  20  24  28  26  34
## [18]  34  46  26  36  60  80  20  26  54  32  40  32  40  50  42  56  76
## [35]  84  36  46  68  32  48  52  56  64  66  54  70  92  93 120  85
## 
## [[2]]
##  [1] 115 117 120 123 126 129 132 135 139 142 146 150 154 159 164
## 
## [[3]]
##  [1] 2791.900 3892.600 3930.660 3869.320 3948.540 4010.150 4345.750
##  [8] 4344.750 3682.040 3098.650 4480.050 3986.240 4036.540 3518.040
## [15] 3999.370 3629.070 4608.660 4787.620 4864.220 4479.410 3428.740
## [22] 4353.140 4697.650 3518.440 1977.390 1379.350 1916.240 1585.420
## [29] 1851.210 1239.660 1728.140 1461.060 1426.760  990.388 1350.760
## [36] 1461.060 1376.700  476.322 1189.460 1644.960  941.543  308.642
## [43] 1145.690 2280.490 1174.110  597.808 1455.880 1485.580
kable(lapply(dataset_list,"[",1,)) %>% #extract 1st row from multiple dataframe
    kable_styling(bootstrap_options = c("striped", "hover")) 
speed dist
4 2
height weight
58 115
area peri shape perm
4990 2791.9 0.0903296 6.3
lapply(dataset_list,"[",1,2) # extract 1st row, 2nd column value
## [[1]]
## [1] 2
## 
## [[2]]
## [1] 115
## 
## [[3]]
## [1] 2791.9
second <- lapply(dataset_list,"[",,2) #mean of each 2nd column
avg <-lapply(second,mean)
avg
## [[1]]
## [1] 42.98
## 
## [[2]]
## [1] 136.7333
## 
## [[3]]
## [1] 2682.212
class(avg) # "list""
## [1] "list"

sapply()

  • sapply is a user-friendly version of lapply by default returning a vector or matrix if appropriate.
#version1
dataset_list <- list(A[,1],B[,1],C[,1])
avg <- sapply(dataset_list, mean)
avg 
## [1]   15.400   65.000 7187.729
class(avg) #"numeric"
## [1] "numeric"
#version2
avg <- function(x) {  
  ( min(x) + max(x) ) / 2}
avg2 <- sapply(dataset_list, avg)
avg2
## [1]   14.5   65.0 6614.0

tapply()

  • Apply a function to each cell of a ragged array, that is to each (non-empty) group of values given by a unique combination of the levels of certain factors.
#mean by group
tapply(data$sr,rownames(data), mean)
##      Australia        Austria        Belgium        Bolivia         Brazil 
##          11.43          12.07          13.17           5.75          12.88 
##         Canada          Chile          China       Colombia     Costa Rica 
##           8.79           0.60          11.90           4.98          10.78 
##        Denmark        Ecuador        Finland         France        Germany 
##          16.85           3.59          11.24          12.64          12.55 
##         Greece      Guatamala       Honduras        Iceland          India 
##          10.67           3.01           7.70           1.27           9.00 
##        Ireland          Italy        Jamaica          Japan          Korea 
##          11.34          14.28           7.72          21.10           3.98 
##          Libya     Luxembourg       Malaysia          Malta    Netherlands 
##           8.89          10.35           4.71          15.48          14.65 
##    New Zealand      Nicaragua         Norway         Panama       Paraguay 
##          10.67           7.30          10.25           4.44           2.02 
##           Peru    Philippines       Portugal   South Africa South Rhodesia 
##          12.70          12.78          12.49          11.14          13.30 
##          Spain         Sweden    Switzerland        Tunisia         Turkey 
##          11.77           6.86          14.13           2.81           5.13 
## United Kingdom  United States        Uruguay      Venezuela         Zambia 
##           7.81           7.56           9.24           9.22          18.56