FAMILIA DE LOS APPLY

INTRODUCCION

En el presente artículo realizaremos una serie de ejercicios sobre la familia de los “APPLY”, realizados por estudiantes de la Facultad de Ciencias Económicas, carrera de Estadística de la Universidad Central del Ecuador, estos programas nos será de ayuda para nuestro aprendizaje.

APPLY

EN UN DATA FRAME

df<- data.frame(x= 1:4, y= 5:8, z= 10:13)
df
##   x y  z
## 1 1 5 10
## 2 2 6 11
## 3 3 7 12
## 4 4 8 13
apply(X= df, MARGIN= 1, FUN= sum)
## [1] 16 19 22 25
apply(df, 1, sum)
## [1] 16 19 22 25
apply(df[c(1, 2), ], 1, sum)
##  1  2 
## 16 19
apply(df, 2, sum)
##  x  y  z 
## 10 26 46
apply(df[, c(1, 3)], 2, sum)
##  x  z 
## 10 46
apply(df, c(1, 2), sum)
##      x y  z
## [1,] 1 5 10
## [2,] 2 6 11
## [3,] 3 7 12
## [4,] 4 8 13
apply(df, c(2, 1), sum)
##   [,1] [,2] [,3] [,4]
## x    1    2    3    4
## y    5    6    7    8
## z   10   11   12   13
apply(df, 1, mean, na.rm = TRUE)
## [1] 5.333333 6.333333 7.333333 8.333333

EN FUNCIONES

fun<- function(x, character= FALSE){
  if(character == FALSE){
    x^2
  }else{
    as.character(x ^2)
  }
}

apply(df, 1, fun)
##   [,1] [,2] [,3] [,4]
## x    1    4    9   16
## y   25   36   49   64
## z  100  121  144  169
apply(df, 1, fun, character = TRUE)
##      [,1]  [,2]  [,3]  [,4] 
## [1,] "1"   "4"   "9"   "16" 
## [2,] "25"  "36"  "49"  "64" 
## [3,] "100" "121" "144" "169"
apply(df, 2, fun)
##       x  y   z
## [1,]  1 25 100
## [2,]  4 36 121
## [3,]  9 49 144
## [4,] 16 64 169
apply(df, c(1, 2), fun)
##       x  y   z
## [1,]  1 25 100
## [2,]  4 36 121
## [3,]  9 49 144
## [4,] 16 64 169

FUNCION EXPONENCIAL

f<- function(x) sum(exp(x))
apply(df, 1, f)
## [1]  22177.60  60284.96 163871.51 445448.95
apply(df, 2, f)
##            x            y            z 
##     84.79102   4629.43310 687068.79094
apply(df, 1:2, f)
##              x         y         z
## [1,]  2.718282  148.4132  22026.47
## [2,]  7.389056  403.4288  59874.14
## [3,] 20.085537 1096.6332 162754.79
## [4,] 54.598150 2980.9580 442413.39
apply(df, 2, min)
##  x  y  z 
##  1  5 10
apply(df, 2, range)
##      x y  z
## [1,] 1 5 10
## [2,] 4 8 13
apply(df, 1, summary)
##              [,1]      [,2]      [,3]      [,4]
## Min.     1.000000  2.000000  3.000000  4.000000
## 1st Qu.  3.000000  4.000000  5.000000  6.000000
## Median   5.000000  6.000000  7.000000  8.000000
## Mean     5.333333  6.333333  7.333333  8.333333
## 3rd Qu.  7.500000  8.500000  9.500000 10.500000
## Max.    10.000000 11.000000 12.000000 13.000000
apply(df, 2, summary)
##            x    y     z
## Min.    1.00 5.00 10.00
## 1st Qu. 1.75 5.75 10.75
## Median  2.50 6.50 11.50
## Mean    2.50 6.50 11.50
## 3rd Qu. 3.25 7.25 12.25
## Max.    4.00 8.00 13.00
ar<- array(data = 1:18, dim = c(3, 2, 3))
apply(ar, 3, sum)
## [1] 21 57 93

TAPPLY

EN UN DATA FRAME

set.seed(2)
data_set<- data.frame(precio= round(rnorm(25, sd= 10, mean= 30)),
                       tipo= sample(1:4, size= 25, replace= TRUE),
                       tienda= sample(paste("Tienda", 1:4),
                                       size= 25, replace = TRUE))

head(data_set)
##   precio tipo   tienda
## 1     21    2 Tienda 2
## 2     32    3 Tienda 3
## 3     46    4 Tienda 4
## 4     19    3 Tienda 4
## 5     29    1 Tienda 4
## 6     31    3 Tienda 4
precio<- data_set$precio
tienda<- data_set$tienda
tipo<- factor(data_set$tipo,
               labels = c("juguetes", "comida", "electrónica", "bebidas"))

precios_medios<- tapply(precio, tipo, mean)
precios_medios
##    juguetes      comida electrónica     bebidas 
##    39.50000    30.33333    32.20000    29.33333
class(precios_medios)
## [1] "array"
precios_medios[2]
##   comida 
## 30.33333
lista_precios_medios<- tapply(precio, tipo, mean, simplify= FALSE)
lista_precios_medios
## $juguetes
## [1] 39.5
## 
## $comida
## [1] 30.33333
## 
## $electrónica
## [1] 32.2
## 
## $bebidas
## [1] 29.33333
lista_precios_medios$juguetes
## [1] 39.5
data_set[1, 1] <- NA
data_set[2, 3] <- NA
tapply(data_set$precio, data_set$tienda, mean)
## Tienda 1 Tienda 2 Tienda 3 Tienda 4 
## 32.00000       NA 39.25000 33.14286
tapply(data_set$precio, data_set$tienda, mean, na.rm= TRUE)
## Tienda 1 Tienda 2 Tienda 3 Tienda 4 
## 32.00000 33.50000 39.25000 33.14286

COMO FUNCION

f<- function(x) mean(x, na.rm = TRUE)
tapply(data_set$precio, data_set$tienda, f)
## Tienda 1 Tienda 2 Tienda 3 Tienda 4 
## 32.00000 33.50000 39.25000 33.14286
tapply(precio, list(tipo, tienda), mean)
##             Tienda 1 Tienda 2 Tienda 3 Tienda 4
## juguetes          46 31.00000       49 36.66667
## comida            26 30.33333       39       NA
## electrónica       50 29.00000       32 25.00000
## bebidas           22 40.00000       20 36.00000
tapply(precio, list(tipo, tienda), mean, default= 0)
##             Tienda 1 Tienda 2 Tienda 3 Tienda 4
## juguetes          46 31.00000       49 36.66667
## comida            26 30.33333       39  0.00000
## electrónica       50 29.00000       32 25.00000
## bebidas           22 40.00000       20 36.00000

SAPPLY

COMO FUNCION

sapply(1:4, sqrt)
## [1] 1.000000 1.414214 1.732051 2.000000
sapply(1:4, function(i) sqrt(i))
## [1] 1.000000 1.414214 1.732051 2.000000
mi_fun<- function(i){
  sqrt(i)
}
sapply(1:4, mi_fun)
## [1] 1.000000 1.414214 1.732051 2.000000
out<- numeric(10)

SAPPLY vs LAPPLY

COMPARACION

sapply(c(3, 5, 7), exp)
## [1]   20.08554  148.41316 1096.63316
lapply(c(3, 5, 7), exp)
## [[1]]
## [1] 20.08554
## 
## [[2]]
## [1] 148.4132
## 
## [[3]]
## [1] 1096.633
sapply(c(3, 5, 7), exp, simplify= FALSE)
## [[1]]
## [1] 20.08554
## 
## [[2]]
## [1] 148.4132
## 
## [[3]]
## [1] 1096.633

EN LISTA

as.list(sapply(c(3, 5, 7), exp))
## [[1]]
## [1] 20.08554
## 
## [[2]]
## [1] 148.4132
## 
## [[3]]
## [1] 1096.633
simplify2array(lapply(c(3, 5, 7), exp))
## [1]   20.08554  148.41316 1096.63316

EN MATRIZ

matrix(1, ncol = 3, nrow = 3)
##      [,1] [,2] [,3]
## [1,]    1    1    1
## [2,]    1    1    1
## [3,]    1    1    1
sapply(1:3, function(i) matrix(i, ncol= 3, nrow= 3))
##       [,1] [,2] [,3]
##  [1,]    1    2    3
##  [2,]    1    2    3
##  [3,]    1    2    3
##  [4,]    1    2    3
##  [5,]    1    2    3
##  [6,]    1    2    3
##  [7,]    1    2    3
##  [8,]    1    2    3
##  [9,]    1    2    3
sapply(1:3, function(i) matrix(i, ncol= 3, nrow= 3), simplify= "array")
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    1    1
## [2,]    1    1    1
## [3,]    1    1    1
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    2    2    2
## [2,]    2    2    2
## [3,]    2    2    2
## 
## , , 3
## 
##      [,1] [,2] [,3]
## [1,]    3    3    3
## [2,]    3    3    3
## [3,]    3    3    3
sapply(1:3, function(i) matrix(i, ncol= 3, nrow= 3), simplify= FALSE)
## [[1]]
##      [,1] [,2] [,3]
## [1,]    1    1    1
## [2,]    1    1    1
## [3,]    1    1    1
## 
## [[2]]
##      [,1] [,2] [,3]
## [1,]    2    2    2
## [2,]    2    2    2
## [3,]    2    2    2
## 
## [[3]]
##      [,1] [,2] [,3]
## [1,]    3    3    3
## [2,]    3    3    3
## [3,]    3    3    3

COMO FUNCION

df<- trees
res<- data.frame()

for(i in 1:ncol(df)){
  for (j in 1:nrow(df)){
    res[j, i]<- df[j, i]*2
  }
}

sapply(1:ncol(df), function(i){
  sapply(1:nrow(df), function(j){
    df[j, i]*2
  })
})
##       [,1] [,2]  [,3]
##  [1,] 16.6  140  20.6
##  [2,] 17.2  130  20.6
##  [3,] 17.6  126  20.4
##  [4,] 21.0  144  32.8
##  [5,] 21.4  162  37.6
##  [6,] 21.6  166  39.4
##  [7,] 22.0  132  31.2
##  [8,] 22.0  150  36.4
##  [9,] 22.2  160  45.2
## [10,] 22.4  150  39.8
## [11,] 22.6  158  48.4
## [12,] 22.8  152  42.0
## [13,] 22.8  152  42.8
## [14,] 23.4  138  42.6
## [15,] 24.0  150  38.2
## [16,] 25.8  148  44.4
## [17,] 25.8  170  67.6
## [18,] 26.6  172  54.8
## [19,] 27.4  142  51.4
## [20,] 27.6  128  49.8
## [21,] 28.0  156  69.0
## [22,] 28.4  160  63.4
## [23,] 29.0  148  72.6
## [24,] 32.0  144  76.6
## [25,] 32.6  154  85.2
## [26,] 34.6  162 110.8
## [27,] 35.0  164 111.4
## [28,] 35.8  160 116.6
## [29,] 36.0  160 103.0
## [30,] 36.0  160 102.0
## [31,] 41.2  174 154.0

GRAFICO

plot(rnorm(10), ylim= c(-6, 6))

nlines<- 5