Functional Programming Basics

library(purrr)

myFn = function(x){
  if (x > 10){
    stop("x is too big")
  } 
  else if ( x > 5){
    return (x*2)
  }
  else {
    y = x - 1
    for (i in 1:5){
      y = y*2
    }
  }
   y
}

myFn(2)
## [1] 32
myFn(5)
## [1] 128

scope of variables

v = 8

f = function(x){
  y = 2 * x
  v = v -1
  y + v
}

f(3) # = 7 + 6
## [1] 13
v    # unchanged
## [1] 8

functions with default arguments

f = function(x, y =3){
  x + y
}

f(4)    # = 7
## [1] 7
f(4, 5) # = 9
## [1] 9

functions as variables

avg = function(values, fn){
  fn(values)
}

avg( c(1,4,5,7), mean)
## [1] 4.25
avg( c(1,2,2,5,4,2), median)
## [1] 2

nested functions

dice = function(sides){
  function(rolls){
    sample(1:sides, rolls, replace=TRUE)
  }
}


five_sided = dice(5)

five_sided(6)
## [1] 2 1 3 5 1 2
dice(12)(20)
##  [1] 10 11  9  2  7  1  5  1  5  5 12  7 11 12  7 12  3 11  5  7

iteration with vectors

vs = c(1,2,5,8,4)

f = function(x){
  3 * x + 2
}

f(vs) # pass the vector into the function as argument
## [1]  5  8 17 26 14

map

iterate like for loops but use map()

values = c(1:5)
results = map(values, f) # map( VECTOR, function )
results
## [[1]]
## [1] 5
## 
## [[2]]
## [1] 8
## 
## [[3]]
## [1] 11
## 
## [[4]]
## [1] 14
## 
## [[5]]
## [1] 17
# this returns the result and where in the vector the element is

purrr map() functions

you can use these variations for return an atomic vector

  • map_lgl() for logicals
  • map_int() for integers
  • map_dbl() for doubles
  • map_chr() for characters
map_dbl(values, f)
## [1]  5  8 11 14 17

map anonymous function

if using a function only once for the map loop, can pass in function(x)

# watch for the brackets ! 
map_dbl(values, function(x){
  3 * x + 2
})
## [1]  5  8 11 14 17

map a formula

you can type a formula instead of the anonymous function(x)

#  tilda * .x + 2
map_dbl(values, ~ 3 * .x + 2)
## [1]  5  8 11 14 17

map functions with 2 arguments

#  function with 2 arguments
f2 = function(x,y){
  3 * x + y
}

map_dbl(values, f2, y= 2)
## [1]  5  8 11 14 17
map_dbl(vs, f2, y=3)
## [1]  6  9 18 27 15

map with dataframes

Example of iris dataset, 3 rows

class(iris)
## [1] "data.frame"
head(iris, 3)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa

map a dataframe column

map_chr(iris, class)
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##    "numeric"    "numeric"    "numeric"    "numeric"     "factor"

Map over the columns with unique and double

iris %>% 
  map(unique) %>% 
  map_dbl(length)
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##           35           23           43           22            3

map over multiple dataframes

say you have a folder of .csv files. for this example all the files in folder are “ae_attendances/2016-04-01.csv” with dates being different. Using regex to grab the digits of length 4 and 2 between the ‘-’.

#  grab all csv into files
files = dir(path = " ",
            pattern = "^\\d{4}-\\d{2}-\\d{2}\\.csv$",
            full.names = TRUE
            )

#  map() method using the read_csv()
map(files, read_csv, col_types= "ccddd")

can use map_dfr(files, read_csv, col_types="ccddd")

files %>% 
  set_names() %>% 
  map_dfr(read_csv, col_types="ccddd", .id="filename") %>% 
  mutate(period = str_extract(filename, r"\d{4}-\d{2}-\d{2}(?=\.csv$)") %>% 
  lubridate::ymd() )

map over 2 vectors

vectors need to be of same length

# option to have vectors outside
v1 = 1:4
v2 = 6:9

map2_dbl(1:4, 6:9, ~.x * .y)
## [1]  6 14 24 36

use the pmap()

list(1:3, 4:6, 7:9) %>% 
  pmap_dbl( function(x,y,z) x * y +z)
## [1] 11 18 27

named list keys and values in list

list(a= 1:3, b= 4:6, c= 7:9) %>% 
  pmap_dbl( function(c,a,b) a * b + c)
## [1] 11 18 27