Introduction

When should you write a function?

# Cleaner function using range
rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}
rescale01(c(0, 5, 10))
## [1] 0.0 0.5 1.0
rescale01(c(-10, 0, 10))
## [1] 0.0 0.5 1.0
# Handle infinite values
x <- c(1:10, Inf)
rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE, finite = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}
rescale01(x)
##  [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
##  [8] 0.7777778 0.8888889 1.0000000       Inf

When should you Write a function?

# For reporducible work
set.seed(1234)

# Create a data frame
df <- tibble::tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
# Rescale each column
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
  (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$b <- (df$b - min(df$b, na.rm = TRUE)) /
  (max(df$b, na.rm = TRUE) - min(df$b, na.rm = TRUE))
df$c <- (df$c - min(df$c, na.rm = TRUE)) /
  (max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))
df$d <- (df$d - min(df$d, na.rm = TRUE)) /
  (max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))

df
## # A tibble: 10 × 4
##        a      b     c     d
##    <dbl>  <dbl> <dbl> <dbl>
##  1 0.332 0.153  0.782 1    
##  2 0.765 0      0.473 0.519
##  3 1     0.0651 0.498 0.448
##  4 0     0.311  0.943 0.511
##  5 0.809 0.573  0.373 0.168
##  6 0.831 0.260  0     0.308
##  7 0.516 0.143  1     0    
##  8 0.524 0.0255 0.210 0.256
##  9 0.519 0.0472 0.708 0.575
## 10 0.424 1      0.253 0.522
rescale <- function(x) {
  
  # body
  x <- (x - min(x, na.rm = TRUE)) /
    (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
  
  # return values
  return(x)
  
}
df$a <- rescale(df$a)
df$b <- rescale(df$b)
df$c <- rescale(df$c)
df$d <- rescale(df$d)

df
## # A tibble: 10 × 4
##        a      b     c     d
##    <dbl>  <dbl> <dbl> <dbl>
##  1 0.332 0.153  0.782 1    
##  2 0.765 0      0.473 0.519
##  3 1     0.0651 0.498 0.448
##  4 0     0.311  0.943 0.511
##  5 0.809 0.573  0.373 0.168
##  6 0.831 0.260  0     0.308
##  7 0.516 0.143  1     0    
##  8 0.524 0.0255 0.210 0.256
##  9 0.519 0.0472 0.708 0.575
## 10 0.424 1      0.253 0.522

Functions are for humans and computers

Conditional exceution

detect_sign <- function(x) {
  
 if(x > 0) {
    message("Value is positive")
    print(x)
  } else if(x == 0) {
    warning("Value is not positive, but it can be accepted")
    print(x)
} else {
    stop("Value is negative, the function must stop")
    print(x)
  }
  
}

3 %>% detect_sign()
## Value is positive
## [1] 3
0 %>% detect_sign()
## Warning in detect_sign(.): Value is not positive, but it can be accepted
## [1] 0

Function arguments

?mean

x <- c(1:10, 100, NA)
x
##  [1]   1   2   3   4   5   6   7   8   9  10 100  NA
x %>% mean()
## [1] NA
x %>% mean(na.rm = TRUE)
## [1] 14.09091
x %>% mean(na.rm = TRUE, trim = 0.1)
## [1] 6
mean_remove_na <- function(x, na.rm = TRUE, ...) {
  
  avg <- mean(x, na.rm = na.rm, ...)
  
  return(avg)
  
}

x %>% mean_remove_na()
## [1] 14.09091
x %>% mean_remove_na(na.rm = FALSE)
## [1] NA
x %>% mean_remove_na(trim = 0.1)
## [1] 6

two types of functions

  • one that takes a vector as the input
  • another that takes a data frame as the input

Return values

# Early return
complicated_function <- function(x, y, z) {
  if (length(x) == 0 || length(y) == 0) {
    return(0)
  }
}

# Pipeable function
show_missings <- function(df) {
  n <- sum(is.na(df))
  cat("Missing values: ", n, "\n", sep = "")
  invisible(df)
}

show_missings(mtcars)
## Missing values: 0
mtcars %>%
  show_missings() %>%
  mutate(mpg = ifelse(mpg < 20, NA, mpg)) %>%
  show_missings()
## Missing values: 0
## Missing values: 18