intermediate - 3 - apply family
# lapply(X, FUN, ...)
# takes a vector or list X
# applies the function FUN to each of its members
# If "FUN" requires additional arguments,
# you pass them after you've specified "X" and "FUN (...)"
# output = list
# same length as X,
# where each element = result of
# applying FUN
# on the corresponding element of X.
# The vector pioneers has already been created for you
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
# Split names from birth year
split_math <- strsplit(pioneers, split = ":")
# Convert to lowercase strings: split_low
split_low <- lapply(split_math, tolower)
# Take a look at the structure of split_low
str(split_low)
## List of 4
## $ : chr [1:2] "gauss" "1777"
## $ : chr [1:2] "bayes" "1702"
## $ : chr [1:2] "pascal" "1623"
## $ : chr [1:2] "pearson" "1857"
# Write function select_first()
select_first <- function(x) {
x[1]
}
# Apply select_first() over split_low: names
names <- lapply(split_low, select_first)
# Write function select_second()
select_second <- function(x) {
x[2]
}
# Apply select_second() over split_low: years
years <- lapply(split_low, select_second)
names
## [[1]]
## [1] "gauss"
##
## [[2]]
## [1] "bayes"
##
## [[3]]
## [1] "pascal"
##
## [[4]]
## [1] "pearson"
years
## [[1]]
## [1] "1777"
##
## [[2]]
## [1] "1702"
##
## [[3]]
## [1] "1623"
##
## [[4]]
## [1] "1857"
# defining functions to use them only once = overkill
# lapply and anonymous functions
# Named function
triple <- function(x) { 3 * x }
# Anonymous function with same implementation
function(x) { 3 * x }
## function(x) { 3 * x }
# Use anonymous function inside lapply()
lapply(list(1,2,3), function(x) { 3 * x })
## [[1]]
## [1] 3
##
## [[2]]
## [1] 6
##
## [[3]]
## [1] 9
# Transform: use anonymous function inside lapply
names <- lapply(split_low, function(k) {k[1]})
# Transform: use anonymous function inside lapply
years <- lapply(split_low, function(k) {k[2]})
names
## [[1]]
## [1] "gauss"
##
## [[2]]
## [1] "bayes"
##
## [[3]]
## [1] "pascal"
##
## [[4]]
## [1] "pearson"
years
## [[1]]
## [1] "1777"
##
## [[2]]
## [1] "1702"
##
## [[3]]
## [1] "1623"
##
## [[4]]
## [1] "1857"
# lapply with additional arguments
multiply <- function(x, factor) {
x * factor
}
lapply(list(1,2,3), multiply, factor = 3)
## [[1]]
## [1] 3
##
## [[2]]
## [1] 6
##
## [[3]]
## [1] 9
# Generic select function
select_el <- function(x, index) {
x[index]
}
# Use lapply() twice on split_low: names and years
names <- lapply(split_low,select_el,1)
years <- lapply(split_low,select_el,2)
names
## [[1]]
## [1] "gauss"
##
## [[2]]
## [1] "bayes"
##
## [[3]]
## [1] "pascal"
##
## [[4]]
## [1] "pearson"
years
## [[1]]
## [1] "1777"
##
## [[2]]
## [1] "1702"
##
## [[3]]
## [1] "1623"
##
## [[4]]
## [1] "1857"
# sapply(X, FUN, ...)
# simplifies the list that lapply() would return
# by turning it into a vector
temp <- list(c(3, 7, 9, 6, -1), c(6, 9, 12, 13, 5), c(4, 8, 3, -1, -3), c(1, 4, 7, 2, -2),
c(5, 7, 9, 4, 2), c(-3, 5, 8, 9, 4), c(3, 6, 9, 4, 1))
# Use lapply() to find each day's minimum temperature
lapply(temp, min)
## [[1]]
## [1] -1
##
## [[2]]
## [1] 5
##
## [[3]]
## [1] -3
##
## [[4]]
## [1] -2
##
## [[5]]
## [1] 2
##
## [[6]]
## [1] -3
##
## [[7]]
## [1] 1
# Use sapply() to find each day's minimum temperature
sapply(temp, min)
## [1] -1 5 -3 -2 2 -3 1
# Use lapply() to find each day's maximum temperature
lapply(temp, max)
## [[1]]
## [1] 9
##
## [[2]]
## [1] 13
##
## [[3]]
## [1] 8
##
## [[4]]
## [1] 7
##
## [[5]]
## [1] 9
##
## [[6]]
## [1] 9
##
## [[7]]
## [1] 9
# Use sapply() to find each day's maximum temperature
sapply(temp, max)
## [1] 9 13 8 7 9 9 9
# self-written function
extremes_avg <- function(x) {
( min(x) + max(x) ) / 2
}
# Apply extremes_avg() over temp using sapply()
sapply(temp, extremes_avg)
## [1] 4.0 9.0 2.5 2.5 5.5 3.0 5.0
# Apply extremes_avg() over temp using lapply()
lapply(temp, extremes_avg)
## [[1]]
## [1] 4
##
## [[2]]
## [1] 9
##
## [[3]]
## [1] 2.5
##
## [[4]]
## [1] 2.5
##
## [[5]]
## [1] 5.5
##
## [[6]]
## [1] 3
##
## [[7]]
## [1] 5
# what if the function
# you're applying over a list or a vector
# returns a vector of length greater than 1?
# function that returns min and max of a vector: extremes
extremes <- function(x) {
c(min = min(x), max = max(x))
}
# Apply extremes() over temp with sapply()
sapply(temp, extremes)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## min -1 5 -3 -2 2 -3 1
## max 9 13 8 7 9 9 9
# Apply extremes() over temp with lapply()
lapply(temp, extremes)
## [[1]]
## min max
## -1 9
##
## [[2]]
## min max
## 5 13
##
## [[3]]
## min max
## -3 8
##
## [[4]]
## min max
## -2 7
##
## [[5]]
## min max
## 2 9
##
## [[6]]
## min max
## -3 9
##
## [[7]]
## min max
## 1 9
# sapply can't simplify, now what?
# Definition of below_zero()
below_zero <- function(x) {
return(x[x < 0])
}
# Apply below_zero over temp using sapply(): freezing_s
freezing_s <- sapply(temp, below_zero)
freezing_s
## [[1]]
## [1] -1
##
## [[2]]
## numeric(0)
##
## [[3]]
## [1] -1 -3
##
## [[4]]
## [1] -2
##
## [[5]]
## numeric(0)
##
## [[6]]
## [1] -3
##
## [[7]]
## numeric(0)
# Apply below_zero over temp using lapply(): freezing_l
freezing_l <- lapply(temp, below_zero)
freezing_l
## [[1]]
## [1] -1
##
## [[2]]
## numeric(0)
##
## [[3]]
## [1] -1 -3
##
## [[4]]
## [1] -2
##
## [[5]]
## numeric(0)
##
## [[6]]
## [1] -3
##
## [[7]]
## numeric(0)
# Are freezing_s and freezing_l identical?
# freezing_s == freezing_l
identical(freezing_s,freezing_l)
## [1] TRUE
# sapply with functions that return NULL
# Definition of print_info()
print_info <- function(x) {
cat("The average temperature is", mean(x), "\n")
}
# Apply print_info() over temp using sapply()
sapply(temp, print_info)
## The average temperature is 4.8
## The average temperature is 9
## The average temperature is 2.2
## The average temperature is 2.4
## The average temperature is 5.4
## The average temperature is 4.6
## The average temperature is 4.6
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
# Apply print_info() over temp using lapply()
lapply(temp, print_info)
## The average temperature is 4.8
## The average temperature is 9
## The average temperature is 2.2
## The average temperature is 2.4
## The average temperature is 5.4
## The average temperature is 4.6
## The average temperature is 4.6
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
identical(sapply(temp, print_info),lapply(temp, print_info))
## The average temperature is 4.8
## The average temperature is 9
## The average temperature is 2.2
## The average temperature is 2.4
## The average temperature is 5.4
## The average temperature is 4.6
## The average temperature is 4.6
## The average temperature is 4.8
## The average temperature is 9
## The average temperature is 2.2
## The average temperature is 2.4
## The average temperature is 5.4
## The average temperature is 4.6
## The average temperature is 4.6
## [1] TRUE
# 'vector-version' of a list of NULL's
# would simply be a NULL,
# which is no longer a vector
# with the same length as the input.
# vapply(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE)
# FUN.VALUE argument expects a template
# for the return argument of FUN
# USE.NAMES => generate a named array (if possible)
# i.e. restrict the output of the function
# i.e. more robust version of sapply()
# Definition of basics()
# takes a vector, and returns a named vector of length 3
basics <- function(x) {
c(min = min(x), mean = mean(x), max = max(x))
}
# Apply basics() over temp using vapply()
### vapply(temp, basics, numeric(3))
# structure of the output of FUN
# does not correspond to the template you specify in FUN.VALUE
# vapply() will throw an error
# misalignment between expected and actual output
basics <- function(x) {
c(min = min(x), mean = mean(x), median = median(x), max = max(x))
}
# error, because FUN(X[[1]]) result is length 4
### vapply(temp, basics, numeric(3)) # => values must be length 3
# fixed
vapply(temp, basics, numeric(4))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## min -1.0 5 -3.0 -2.0 2.0 -3.0 1.0
## mean 4.8 9 2.2 2.4 5.4 4.6 4.6
## median 6.0 9 3.0 2.0 5.0 5.0 4.0
## max 9.0 13 8.0 7.0 9.0 9.0 9.0
# add robustness
# Convert from sapply to vapply() expression
sapply(temp, max)
## [1] 9 13 8 7 9 9 9
vapply(temp, max, numeric(1))
## [1] 9 13 8 7 9 9 9
# Convert from sapply to vapply() expression
sapply(temp, function(x, y) { mean(x) > y }, y = 5)
## [1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE
vapply(temp, function(x, y) { mean(x) > y }, y = 5, logical(1))
## [1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE