Functional Programming

Functional Programming Basics
scope of variables
functions with default arguments
functions as variables
nested functions
iteration with vectors
map
map functions with 2 arguments
map with dataframes
- map a dataframe column
- map over multiple dataframes
map over 2 vectors

Functional Programming Basics

library(purrr)

myFn = function(x){
  if (x > 10){
    stop("x is too big")
  } 
  else if ( x > 5){
    return (x*2)
  }
  else {
    y = x - 1
    for (i in 1:5){
      y = y*2
    }
  }
   y
}

myFn(2)

## [1] 32

myFn(5)

## [1] 128

scope of variables

v = 8

f = function(x){
  y = 2 * x
  v = v -1
  y + v
}

f(3) # = 7 + 6

## [1] 13

v    # unchanged

## [1] 8

functions with default arguments

f = function(x, y =3){
  x + y
}

f(4)    # = 7

## [1] 7

f(4, 5) # = 9

## [1] 9

functions as variables

avg = function(values, fn){
  fn(values)
}

avg( c(1,4,5,7), mean)

## [1] 4.25

avg( c(1,2,2,5,4,2), median)

## [1] 2

nested functions

dice = function(sides){
  function(rolls){
    sample(1:sides, rolls, replace=TRUE)
  }
}


five_sided = dice(5)

five_sided(6)

## [1] 2 1 3 5 1 2

dice(12)(20)

##  [1] 10 11  9  2  7  1  5  1  5  5 12  7 11 12  7 12  3 11  5  7

iteration with vectors

vs = c(1,2,5,8,4)

f = function(x){
  3 * x + 2
}

f(vs) # pass the vector into the function as argument

## [1]  5  8 17 26 14

map

iterate like for loops but use map()

values = c(1:5)
results = map(values, f) # map( VECTOR, function )
results

## [[1]]
## [1] 5
## 
## [[2]]
## [1] 8
## 
## [[3]]
## [1] 11
## 
## [[4]]
## [1] 14
## 
## [[5]]
## [1] 17

# this returns the result and where in the vector the element is

purrr map() functions

you can use these variations for return an atomic vector

map_lgl() for logicals
map_int() for integers
map_dbl() for doubles
map_chr() for characters

map_dbl(values, f)

## [1]  5  8 11 14 17

map anonymous function

if using a function only once for the map loop, can pass in function(x)

# watch for the brackets ! 
map_dbl(values, function(x){
  3 * x + 2
})

## [1]  5  8 11 14 17

map a formula

you can type a formula instead of the anonymous function(x)

#  tilda * .x + 2
map_dbl(values, ~ 3 * .x + 2)

## [1]  5  8 11 14 17

map functions with 2 arguments

#  function with 2 arguments
f2 = function(x,y){
  3 * x + y
}

map_dbl(values, f2, y= 2)

## [1]  5  8 11 14 17

map_dbl(vs, f2, y=3)

## [1]  6  9 18 27 15

map with dataframes

Example of iris dataset, 3 rows

class(iris)

## [1] "data.frame"

head(iris, 3)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa

map a dataframe column

map_chr(iris, class)

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##    "numeric"    "numeric"    "numeric"    "numeric"     "factor"

Map over the columns with unique and double

iris %>% 
  map(unique) %>% 
  map_dbl(length)

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##           35           23           43           22            3

map over multiple dataframes

say you have a folder of .csv files. for this example all the files in folder are “ae_attendances/2016-04-01.csv” with dates being different. Using regex to grab the digits of length 4 and 2 between the ‘-’.

#  grab all csv into files
files = dir(path = " ",
            pattern = "^\\d{4}-\\d{2}-\\d{2}\\.csv$",
            full.names = TRUE
            )

#  map() method using the read_csv()
map(files, read_csv, col_types= "ccddd")

can use map_dfr(files, read_csv, col_types="ccddd")

files %>% 
  set_names() %>% 
  map_dfr(read_csv, col_types="ccddd", .id="filename") %>% 
  mutate(period = str_extract(filename, r"\d{4}-\d{2}-\d{2}(?=\.csv$)") %>% 
  lubridate::ymd() )

map over 2 vectors

vectors need to be of same length

# option to have vectors outside
v1 = 1:4
v2 = 6:9

map2_dbl(1:4, 6:9, ~.x * .y)

## [1]  6 14 24 36

use the pmap()

list(1:3, 4:6, 7:9) %>% 
  pmap_dbl( function(x,y,z) x * y +z)

## [1] 11 18 27

named list keys and values in list

list(a= 1:3, b= 4:6, c= 7:9) %>% 
  pmap_dbl( function(c,a,b) a * b + c)

## [1] 11 18 27

Functional Programming :: purrr