Functions

Introduction

When to write a function

df <- tibble::tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

df$a <- (df$a - min(df$a, na.rm = TRUE)) / 
  (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$b <- (df$b - min(df$b, na.rm = TRUE)) / 
  (max(df$b, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$c <- (df$c - min(df$c, na.rm = TRUE)) / 
  (max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))
df$d <- (df$d - min(df$d, na.rm = TRUE)) / 
  (max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))
(df$a - min(df$a, na.rm = TRUE)) /
  (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
##  [1] 0.58640452 0.63221953 0.91051498 0.27885759 0.94896964 1.00000000
##  [7] 0.97636226 0.42792992 0.00000000 0.08912878
(df$a - min(df$a, na.rm = TRUE)) /
  (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
##  [1] 0.58640452 0.63221953 0.91051498 0.27885759 0.94896964 1.00000000
##  [7] 0.97636226 0.42792992 0.00000000 0.08912878
x <- df$a
(x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
##  [1] 0.58640452 0.63221953 0.91051498 0.27885759 0.94896964 1.00000000
##  [7] 0.97636226 0.42792992 0.00000000 0.08912878
#>  [1] 0.2892677 0.7509271 0.0000000 0.6781686 0.8530656 1.0000000 0.1716402
#>  [8] 0.6107464 0.6116181 0.6008793

rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
##  [1] 0.58640452 0.63221953 0.91051498 0.27885759 0.94896964 1.00000000
##  [7] 0.97636226 0.42792992 0.00000000 0.08912878
#>  [1] 0.2892677 0.7509271 0.0000000 0.6781686 0.8530656 1.0000000 0.1716402
#>  [8] 0.6107464 0.6116181 0.6008793
rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}
rescale01(c(0, 5, 10))
## [1] 0.0 0.5 1.0
#> [1] 0.0 0.5 1.0
rescale01(c(-10, 0, 10))
## [1] 0.0 0.5 1.0
#> [1] 0.0 0.5 1.0
rescale01(c(1, 2, 3, NA, 5))
## [1] 0.00 0.25 0.50   NA 1.00
#> [1] 0.00 0.25 0.50   NA 1.00

df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)

x <- c(1:10, Inf)
rescale01(x)
##  [1]   0   0   0   0   0   0   0   0   0   0 NaN
#>  [1]   0   0   0   0   0   0   0   0   0   0 NaN

rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE, finite = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}
rescale01(x)
##  [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
##  [8] 0.7777778 0.8888889 1.0000000       Inf
#>  [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
#>  [8] 0.7777778 0.8888889 1.0000000       Inf

Functions are for humans and computers

# Don't do this!
T <- FALSE
c <- 10
mean <- function(x) sum(x)

Conditional execution

has_name <- function(x) {
  nms <- names(x)
  if (is.null(nms)) {
    rep(FALSE, length(x))
  } else {
    !is.na(nms) & nms != ""
  }
}

Condition

identical(0L, 0)
## [1] FALSE
#> [1] FALSE

x <- sqrt(2) ^ 2
x
## [1] 2
#> [1] 2
x == 2
## [1] FALSE
#> [1] FALSE
x - 2
## [1] 4.440892e-16
#> [1] 4.440892e-16

Multiple conditions

Code Style

y <- 10
x <- if (y < 20) "Too low" else "Too high"

if (y < 20) {
  x <- "Too low" 
} else {
  x <- "Too high"
}

Function Arguments

# Compute confidence interval around mean using normal approximation
mean_ci <- function(x, conf = 0.95) {
  se <- sd(x) / sqrt(length(x))
  alpha <- 1 - conf
  mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}

x <- runif(100)
mean_ci(x)
## [1] 46.80940 46.91993
#> [1] 0.4976111 0.6099594
mean_ci(x, conf = 0.99)
## [1] 46.79204 46.93729
#> [1] 0.4799599 0.6276105

Choosing names

Checking values

wt_mean <- function(x, w) {
  sum(x * w) / sum(w)
}
wt_var <- function(x, w) {
  mu <- wt_mean(x, w)
  sum(w * (x - mu) ^ 2) / sum(w)
}
wt_sd <- function(x, w) {
  sqrt(wt_var(x, w))
}

wt_mean <- function(x, w, na.rm = FALSE) {
  if (!is.logical(na.rm)) {
    stop("`na.rm` must be logical")
  }
  if (length(na.rm) != 1) {
    stop("`na.rm` must be length 1")
  }
  if (length(x) != length(w)) {
    stop("`x` and `w` must be the same length", call. = FALSE)
  }
  
  if (na.rm) {
    miss <- is.na(x) | is.na(w)
    x <- x[!miss]
    w <- w[!miss]
  }
  sum(w * x) / sum(w)
}

Dot dot dot

sum(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
## [1] 55
#> [1] 55
stringr::str_c("a", "b", "c", "d", "e", "f")
## [1] "abcdef"
#> [1] "abcdef"

commas <- function(...) stringr::str_c(..., collapse = ", ")
commas(letters[1:10])
## [1] "a, b, c, d, e, f, g, h, i, j"
#> [1] "a, b, c, d, e, f, g, h, i, j"

rule <- function(..., pad = "-") {
  title <- paste0(...)
  width <- getOption("width") - nchar(title) - 5
  cat(title, " ", stringr::str_dup(pad, width), "\n", sep = "")
}
rule("Important output")
## Important output -----------------------------------------------------------
#> Important output -----------------------------------------------------------
x <- c(1, 2)
sum(x, na.mr = TRUE)
## [1] 4
#> [1] 4

Return values

complicated_function <- function(x, y, z) {
  if (length(x) == 0 || length(y) == 0) {
    return(0)
  }
    
  # Complicated code here
}

f <- function() {
  if (x) {
    # Do 
    # something
    # that
    # takes
    # many
    # lines
    # to
    # express
  } else {
    # return something short
  }
}


f <- function() {
  if (!x) {
    return(something_short)
  }

  # Do 
  # something
  # that
  # takes
  # many
  # lines
  # to
  # express
}

writing pipeable functions

show_missings <- function(df) {
  n <- sum(is.na(df))
  cat("Missing values: ", n, "\n", sep = "")
  
  invisible(df)
}

##Environment

f <- function(x) {
  x + y
} 
y <- 100
f(10)
## [1] 110
#> [1] 110

y <- 1000
f(10)
## [1] 1010
#> [1] 1010
`+` <- function(x, y) {
  if (runif(1) < 0.1) {
    sum(x, y)
  } else {
    sum(x, y) * 1.1
  }
}
table(replicate(1000, 1 + 2))
## 
##   3 3.3 
## 102 898
#> 
#>   3 3.3 
#> 100 900
rm(`+`)