library(purrr)
myFn = function(x){
if (x > 10){
stop("x is too big")
}
else if ( x > 5){
return (x*2)
}
else {
y = x - 1
for (i in 1:5){
y = y*2
}
}
y
}
myFn(2)
## [1] 32
myFn(5)
## [1] 128
v = 8
f = function(x){
y = 2 * x
v = v -1
y + v
}
f(3) # = 7 + 6
## [1] 13
v # unchanged
## [1] 8
f = function(x, y =3){
x + y
}
f(4) # = 7
## [1] 7
f(4, 5) # = 9
## [1] 9
avg = function(values, fn){
fn(values)
}
avg( c(1,4,5,7), mean)
## [1] 4.25
avg( c(1,2,2,5,4,2), median)
## [1] 2
dice = function(sides){
function(rolls){
sample(1:sides, rolls, replace=TRUE)
}
}
five_sided = dice(5)
five_sided(6)
## [1] 2 1 3 5 1 2
dice(12)(20)
## [1] 10 11 9 2 7 1 5 1 5 5 12 7 11 12 7 12 3 11 5 7
vs = c(1,2,5,8,4)
f = function(x){
3 * x + 2
}
f(vs) # pass the vector into the function as argument
## [1] 5 8 17 26 14
iterate like for loops but use map()
values = c(1:5)
results = map(values, f) # map( VECTOR, function )
results
## [[1]]
## [1] 5
##
## [[2]]
## [1] 8
##
## [[3]]
## [1] 11
##
## [[4]]
## [1] 14
##
## [[5]]
## [1] 17
# this returns the result and where in the vector the element is
you can use these variations for return an atomic vector
map_lgl() for logicalsmap_int() for integersmap_dbl() for doublesmap_chr() for charactersmap_dbl(values, f)
## [1] 5 8 11 14 17
if using a function only once for the map loop, can pass in
function(x)
# watch for the brackets !
map_dbl(values, function(x){
3 * x + 2
})
## [1] 5 8 11 14 17
you can type a formula instead of the anonymous
function(x)
# tilda * .x + 2
map_dbl(values, ~ 3 * .x + 2)
## [1] 5 8 11 14 17
# function with 2 arguments
f2 = function(x,y){
3 * x + y
}
map_dbl(values, f2, y= 2)
## [1] 5 8 11 14 17
map_dbl(vs, f2, y=3)
## [1] 6 9 18 27 15
Example of iris dataset, 3 rows
class(iris)
## [1] "data.frame"
head(iris, 3)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
map_chr(iris, class)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## "numeric" "numeric" "numeric" "numeric" "factor"
Map over the columns with unique and double
iris %>%
map(unique) %>%
map_dbl(length)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 35 23 43 22 3
say you have a folder of .csv files. for this example all the files in folder are “ae_attendances/2016-04-01.csv” with dates being different. Using regex to grab the digits of length 4 and 2 between the ‘-’.
# grab all csv into files
files = dir(path = " ",
pattern = "^\\d{4}-\\d{2}-\\d{2}\\.csv$",
full.names = TRUE
)
# map() method using the read_csv()
map(files, read_csv, col_types= "ccddd")
can use map_dfr(files, read_csv, col_types="ccddd")
files %>%
set_names() %>%
map_dfr(read_csv, col_types="ccddd", .id="filename") %>%
mutate(period = str_extract(filename, r"\d{4}-\d{2}-\d{2}(?=\.csv$)") %>%
lubridate::ymd() )
vectors need to be of same length
# option to have vectors outside
v1 = 1:4
v2 = 6:9
map2_dbl(1:4, 6:9, ~.x * .y)
## [1] 6 14 24 36
use the pmap()
list(1:3, 4:6, 7:9) %>%
pmap_dbl( function(x,y,z) x * y +z)
## [1] 11 18 27
named list keys and values in list
list(a= 1:3, b= 4:6, c= 7:9) %>%
pmap_dbl( function(c,a,b) a * b + c)
## [1] 11 18 27