Introduction

When Should You Write a Function?

# Creat a data frame
tibble::tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
## # A tibble: 10 × 4
##         a      b      c        d
##     <dbl>  <dbl>  <dbl>    <dbl>
##  1 -0.105  1.09   2.14  -0.0444 
##  2 -1.02  -1.32   0.770 -0.382  
##  3  0.259 -0.146  1.43   1.08   
##  4 -1.51  -0.872 -1.11   0.0212 
##  5 -1.33  -0.606  1.31  -1.23   
##  6 -0.642 -0.418  0.331 -1.50   
##  7 -1.56  -1.19   0.414 -0.0535 
##  8 -0.576 -0.719 -1.89   0.860  
##  9  0.495 -0.148 -0.185 -0.0537 
## 10  1.46  -0.620  0.724  0.00985
df <- tibble::tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
# For reproducible work 
set.seed(1234)

# Rescale each column 
(df$a - min(df$a, na.rm = TRUE)) / 
  (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
##  [1] 0.5485509 1.0000000 0.6410150 0.5933291 0.6763798 0.6381797 0.7272041
##  [8] 0.4090590 0.4589032 0.0000000
df$a <- (df$a - min(df$a, na.rm = TRUE)) / 
  (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))

df$b <- (df$b - min(df$b, na.rm = TRUE)) / 
  (max(df$b, na.rm = TRUE) - min(df$b, na.rm = TRUE))

df$c <- (df$c - min(df$c, na.rm = TRUE)) / 
  (max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))

df$d <- (df$d - min(df$d, na.rm = TRUE)) / 
  (max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))
df
## # A tibble: 10 × 4
##        a     b     c     d
##    <dbl> <dbl> <dbl> <dbl>
##  1 0.549 0.820 0.479 0.547
##  2 1     0.598 1     0.412
##  3 0.641 0.745 0.275 1    
##  4 0.593 0     0     0.233
##  5 0.676 1     0.552 0.420
##  6 0.638 0.427 0.946 0.482
##  7 0.727 0.619 0.228 0.839
##  8 0.409 0.929 0.180 0.664
##  9 0.459 0.578 0.891 0    
## 10 0     0.612 0.296 0.690
square <-  function(var) {
    
    # body
    squared_value <- var * var
    
    # return value 
    return(sqaured_value)
    
}
rescale <- function(x) {
    
    # body
    x <- (x - min(x, na.rm = TRUE)) / 
  (max(df$a, na.rm = TRUE) - min(x, na.rm = TRUE))

    # return values 
    return(x)
    
}
df$a <- rescale(df$a)

df$b <- rescale(df$b)

df$c <- rescale(df$c)

df$d <- rescale(df$d)

df
## # A tibble: 10 × 4
##        a     b     c     d
##    <dbl> <dbl> <dbl> <dbl>
##  1 0.549 0.820 0.479 0.547
##  2 1     0.598 1     0.412
##  3 0.641 0.745 0.275 1    
##  4 0.593 0     0     0.233
##  5 0.676 1     0.552 0.420
##  6 0.638 0.427 0.946 0.482
##  7 0.727 0.619 0.228 0.839
##  8 0.409 0.929 0.180 0.664
##  9 0.459 0.578 0.891 0    
## 10 0     0.612 0.296 0.690

Functions are for Humans and Computers

Conditional Execution

detect_sign <- function(x) {
    
    if(x > 0) {
        message("Value is positive")
        print(x)
    } else if(x == 0) {
        warning("Value is not positive, but it can be accepted")
        print(x)
    } else {
        stop("Value is negative, the function must stop")
        print(x)
    }
    
}

3 %>% detect_sign()
## Value is positive
## [1] 3
0 %>% detect_sign()
## Warning in detect_sign(.): Value is not positive, but it can be accepted
## [1] 0

Function Arguments

?mean
## starting httpd help server ... done
x <- c(1:10, 100, NA)
x
##  [1]   1   2   3   4   5   6   7   8   9  10 100  NA
x %>% mean()
## [1] NA
x %>% mean(na.rm = TRUE, trim = 0.1)
## [1] 6
mean_remove_na <-  function(x, na.rm = TRUE, ...) {
    
    avg <- mean(x, na.rm = na.rm, ... )
    
    return(avg)
    
}

x %>% mean_remove_na()
## [1] 14.09091
x %>% mean_remove_na(na.rm = FALSE)
## [1] NA
x %>% mean_remove_na(trim = 0.1)
## [1] 6

Two types of functions