# Creando vectores para combinar
names.first <- c("Juan Gabriel", "Zach", "Jack", "Sansa")
names.last <- c("Gomila", "Effron", "Sparrow", "Stark")
#La función paste, combina los dos vectores como si hubieramos hecho un bucle for
paste(names.first, names.last)
## [1] "Juan Gabriel Gomila" "Zach Effron" "Jack Sparrow"
## [4] "Sansa Stark"
Supongamos que uno de los dos vectores es más largo:
single.surname <- c("Zuccherberg")
#La función paste sirve para combinar incluso vectores de diferente tamaño!
paste(names.first, single.surname)
## [1] "Juan Gabriel Zuccherberg" "Zach Zuccherberg"
## [3] "Jack Zuccherberg" "Sansa Zuccherberg"
Una función que tomara dos vectores y los combinara:
username <- function(first, last){
tolower(paste0(last, substr(first, 1, 2)))
}
username(names.first, names.last)
## [1] "gomilaju" "effronza" "sparrowja" "starksa"
Esun afunción para aplicar a un conjunto de filas o columnas de un matriz:
m <- matrix(seq(1,16), 4, 4)
m
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
Operaciones con apply:
# 1 -> función aplicada por filas
# 2 -> función aplicada por columnas
# c(1,2) -> función aplicada a elementos
apply(m, 1, min ) # mínimo por fila
## [1] 1 2 3 4
apply(m, 2, max) # màximo por columna
## [1] 4 8 12 16
apply(m, 1, prod) # producto por fila
## [1] 585 1680 3465 6144
apply(m, 2, sum) # suma de a columna
## [1] 10 26 42 58
apply(m, c(1,2), function(x){x^2}) # elevar al cuadrado cada una de las entradas para fila y columnas
## [,1] [,2] [,3] [,4]
## [1,] 1 25 81 169
## [2,] 4 36 100 196
## [3,] 9 49 121 225
## [4,] 16 64 144 256
apply(m, 1, quantile, probs = c(0.4, 0.6, 0.8)) # aplicando una función de percentiles o cuartiles a una columna (ejemplo)
## [,1] [,2] [,3] [,4]
## 40% 5.8 6.8 7.8 8.8
## 60% 8.2 9.2 10.2 11.2
## 80% 10.6 11.6 12.6 13.6
# suma de columnas y filas
colSums(m)
## [1] 10 26 42 58
rowSums(m)
## [1] 28 32 36 40
# media de columnas y filas
colMeans(m)
## [1] 2.5 6.5 10.5 14.5
rowMeans(m)
## [1] 7 8 9 10
Aplicando “apply” en array. Primero creamos le array 3D:
array3D <- array(seq(1, 32), dim = c(4,4,2))
array3D
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 17 21 25 29
## [2,] 18 22 26 30
## [3,] 19 23 27 31
## [4,] 20 24 28 32
Aplicamos las operaciones:
apply(array3D, 3, sum) # indicamos la 3a dimension que se puede sumar
## [1] 136 392
sum(1:16)
## [1] 136
apply(array3D, 3, mean)
## [1] 8.5 24.5
apply(array3D, c(1,2), sum)
## [,1] [,2] [,3] [,4]
## [1,] 18 26 34 42
## [2,] 20 28 36 44
## [3,] 22 30 38 46
## [4,] 24 32 40 48
Hay dos mejores para trabajar con vectores, listas y dataframes:
lapply -> l = lista, se puede aplicar vectores, list, data frame sapply -> lista o vector
auto <- read.csv("../DataSets/auto-mpg.csv", stringsAsFactors = F)
Creamos un vector que aplicaremos directamente en el dataframe:
x <- c(1,2,3)
x
## [1] 1 2 3
# aplicamos la raíz cuadrada con las dos funciones
lapply(x, sqrt) # crea una llista
## [[1]]
## [1] 1
##
## [[2]]
## [1] 1.414214
##
## [[3]]
## [1] 1.732051
class(lapply(x, sqrt)) # podemos ver que es una lista
## [1] "list"
x <- list(a = 1:10, b = c(1,10,100,1000), c = seq(5,50, by = 5))
x
## $a
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $b
## [1] 1 10 100 1000
##
## $c
## [1] 5 10 15 20 25 30 35 40 45 50
lapply(x, mean)
## $a
## [1] 5.5
##
## $b
## [1] 277.75
##
## $c
## [1] 27.5
class(lapply(x,mean))
## [1] "list"
Miramos como es con un sapply()
sapply(x, sqrt) # crea un vector
## $a
## [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751
## [8] 2.828427 3.000000 3.162278
##
## $b
## [1] 1.000000 3.162278 10.000000 31.622777
##
## $c
## [1] 2.236068 3.162278 3.872983 4.472136 5.000000 5.477226 5.916080
## [8] 6.324555 6.708204 7.071068
class(sapply(x, sqrt))
## [1] "list"
sapply(x, mean)
## a b c
## 5.50 277.75 27.50
class(sapply(x, mean))
## [1] "numeric"
Aplicando las funciones sobre el dataset:
lapply(auto[,2:8], min)
## $mpg
## [1] 9
##
## $cylinders
## [1] 3
##
## $displacement
## [1] 68
##
## $horsepower
## [1] 46
##
## $weight
## [1] 1613
##
## $acceleration
## [1] 8
##
## $model_year
## [1] 70
sapply(auto[,2:8], min)
## mpg cylinders displacement horsepower weight
## 9 3 68 46 1613
## acceleration model_year
## 8 70
La función summary por defecto devuelve una matriz:
lapply(auto[,2:8], summary)
## $mpg
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.00 17.50 23.00 23.51 29.00 46.60
##
## $cylinders
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.000 4.000 5.455 8.000 8.000
##
## $displacement
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 68.0 104.2 148.5 193.4 262.0 455.0
##
## $horsepower
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 46.0 76.0 92.0 104.1 125.0 230.0
##
## $weight
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1613 2224 2804 2970 3608 5140
##
## $acceleration
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.00 13.82 15.50 15.57 17.18 24.80
##
## $model_year
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 70.00 73.00 76.00 76.01 79.00 82.00
sapply(auto[,2:8], summary)
## mpg cylinders displacement horsepower weight acceleration
## Min. 9.00000 3.000000 68.0000 46.0000 1613.000 8.00000
## 1st Qu. 17.50000 4.000000 104.2500 76.0000 2223.750 13.82500
## Median 23.00000 4.000000 148.5000 92.0000 2803.500 15.50000
## Mean 23.51457 5.454774 193.4259 104.1281 2970.425 15.56809
## 3rd Qu. 29.00000 8.000000 262.0000 125.0000 3608.000 17.17500
## Max. 46.60000 8.000000 455.0000 230.0000 5140.000 24.80000
## model_year
## Min. 70.00000
## 1st Qu. 73.00000
## Median 76.00000
## Mean 76.01005
## 3rd Qu. 79.00000
## Max. 82.00000
sapply(auto[,2:8], range) # range devuelve el mínimo y el máximo
## mpg cylinders displacement horsepower weight acceleration model_year
## [1,] 9.0 3 68 46 1613 8.0 70
## [2,] 46.6 8 455 230 5140 24.8 82
sapply(auto[,2:8], min)
## mpg cylinders displacement horsepower weight
## 9 3 68 46 1613
## acceleration model_year
## 8 70
sapply(auto[,2], min)#esto falla porque R trabaja con vector
## [1] 28.0 19.0 36.0 28.0 21.0 23.0 15.5 32.9 16.0 13.0 12.0 30.7 13.0 27.9
## [15] 13.0 23.8 29.0 14.0 14.0 29.0 20.5 26.6 20.0 20.0 26.4 16.0 40.8 15.0
## [29] 18.0 35.0 26.5 13.0 25.8 39.1 25.0 14.0 19.4 30.0 32.0 26.0 20.6 17.5
## [43] 18.0 14.0 27.0 25.1 14.0 19.1 17.0 23.5 21.5 19.0 22.0 19.4 20.0 32.0
## [57] 30.9 29.0 14.0 14.0 38.0 24.0 14.0 14.0 16.5 31.0 19.9 12.0 16.0 17.0
## [71] 33.5 15.0 19.0 31.3 18.0 13.0 20.5 21.0 14.0 15.5 32.0 33.8 36.1 22.0
## [85] 21.0 17.6 40.9 15.5 26.0 24.0 15.0 13.0 36.0 37.2 43.4 25.0 31.5 15.0
## [99] 27.0 11.0 26.0 26.0 26.0 12.0 24.5 26.0 34.5 15.0 27.5 18.0 32.3 18.0
## [113] 10.0 23.6 22.4 37.0 21.5 33.0 27.0 25.0 23.0 14.0 26.6 18.0 14.0 14.0
## [127] 34.0 29.5 16.9 21.0 26.0 33.0 16.0 15.0 10.0 28.0 20.2 29.8 14.0 11.0
## [141] 15.0 38.0 13.0 41.5 19.2 34.1 21.0 36.0 18.0 44.0 37.7 16.0 31.0 25.5
## [155] 34.3 26.0 16.0 13.0 27.2 23.0 24.2 30.0 14.0 13.0 30.0 17.5 17.5 29.0
## [169] 22.0 15.0 36.4 15.0 36.0 21.0 15.5 36.1 17.5 22.0 26.0 18.0 25.0 23.0
## [183] 15.0 22.0 23.0 12.0 15.0 21.6 22.0 9.0 28.0 22.0 14.5 20.0 19.0 22.3
## [197] 24.0 12.0 18.0 29.5 19.0 25.4 24.0 26.0 23.0 13.0 18.0 27.2 18.0 24.0
## [211] 29.0 26.0 13.0 31.0 20.6 33.5 27.4 30.0 18.0 24.0 25.0 12.0 20.0 13.0
## [225] 26.8 15.0 26.0 31.0 34.7 30.5 25.0 21.0 18.1 32.4 15.0 24.0 28.0 18.6
## [239] 19.8 13.0 19.0 25.0 23.0 27.0 25.0 35.7 20.0 16.5 34.0 20.0 23.7 18.2
## [253] 11.0 16.0 24.5 33.7 37.3 16.2 31.0 27.2 32.0 31.6 43.1 30.0 32.4 27.0
## [267] 17.0 29.0 46.6 20.5 34.2 32.0 18.5 17.7 25.0 20.2 17.0 28.0 28.0 28.0
## [281] 18.0 17.5 13.0 15.0 15.0 27.0 20.2 24.0 23.0 25.4 11.0 15.0 27.0 20.0
## [295] 29.9 36.0 23.2 37.0 24.0 38.0 35.0 29.8 26.0 27.0 17.6 13.0 24.0 22.0
## [309] 26.0 18.5 16.0 19.0 31.0 28.0 18.0 20.3 34.1 31.0 36.0 15.5 13.0 27.0
## [323] 19.0 18.0 18.0 32.2 31.8 17.0 33.5 25.5 30.0 33.0 21.0 19.0 44.3 22.0
## [337] 19.0 24.0 28.1 14.0 19.2 32.0 17.0 18.5 13.0 28.4 19.2 29.0 31.5 23.9
## [351] 13.0 34.4 28.8 23.0 38.0 20.0 16.0 16.0 25.0 30.0 32.8 20.2 18.1 32.7
## [365] 21.5 22.0 24.3 14.0 37.0 25.0 28.0 31.9 16.0 19.0 39.4 29.0 17.0 13.0
## [379] 23.0 22.5 20.8 16.0 39.0 21.1 18.0 35.1 14.0 44.6 16.0 14.0 32.1 23.9
## [393] 13.0 16.5 34.5 38.1 30.5 19.0
sapply(as.data.frame(auto[,2]),min, simplify = F)
## $`auto[, 2]`
## [1] 9
#tapply
auto$cylinders <- factor(auto$cylinders,
levels = c(3,4,5,6,8),
labels = c("3C", "4C", "5C", "6C", "8C"))
tapply(auto$mpg, auto$cylinders, mean)
## 3C 4C 5C 6C 8C
## 20.55000 29.28676 27.36667 19.98571 14.96311
tapply(auto$mpg, list(cyl = auto$cylinders), mean)
## cyl
## 3C 4C 5C 6C 8C
## 20.55000 29.28676 27.36667 19.98571 14.96311
by(auto, auto$cylinders,
function(row){ cor(row$mpg, row$acceleration)} )
## auto$cylinders: 3C
## [1] -0.8188727
## --------------------------------------------------------
## auto$cylinders: 4C
## [1] 0.07488465
## --------------------------------------------------------
## auto$cylinders: 5C
## [1] 0.714897
## --------------------------------------------------------
## auto$cylinders: 6C
## [1] -0.339605
## --------------------------------------------------------
## auto$cylinders: 8C
## [1] 0.3212278