When Should You Write a Function?
# For reproducible work
set.seed(1234)
# Create a data frame
df <- tibble::tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df
## # A tibble: 10 × 4
## a b c d
## <dbl> <dbl> <dbl> <dbl>
## 1 -1.21 -0.477 0.134 1.10
## 2 0.277 -0.998 -0.491 -0.476
## 3 1.08 -0.776 -0.441 -0.709
## 4 -2.35 0.0645 0.460 -0.501
## 5 0.429 0.959 -0.694 -1.63
## 6 0.506 -0.110 -1.45 -1.17
## 7 -0.575 -0.511 0.575 -2.18
## 8 -0.547 -0.911 -1.02 -1.34
## 9 -0.564 -0.837 -0.0151 -0.294
## 10 -0.890 2.42 -0.936 -0.466
# Rescale each column
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
(max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$b <- (df$b - min(df$b, na.rm = TRUE)) /
(max(df$b, na.rm = TRUE) - min(df$b, na.rm = TRUE))
df$c <- (df$c - min(df$c, na.rm = TRUE)) /
(max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))
df$d <- (df$d - min(df$d, na.rm = TRUE)) /
(max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))
rescale <- function(x) {
# body
x <- (x - min(x, na.rm = TRUE)) /
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
# return values
return(x)
}
df$a <- rescale(df$a)
df$b <- rescale(df$b)
df$c <- rescale(df$c)
df$d <- rescale(df$d)
df
## # A tibble: 10 × 4
## a b c d
## <dbl> <dbl> <dbl> <dbl>
## 1 0.332 0.153 0.782 1
## 2 0.765 0 0.473 0.519
## 3 1 0.0651 0.498 0.448
## 4 0 0.311 0.943 0.511
## 5 0.809 0.573 0.373 0.168
## 6 0.831 0.260 0 0.308
## 7 0.516 0.143 1 0
## 8 0.524 0.0255 0.210 0.256
## 9 0.519 0.0472 0.708 0.575
## 10 0.424 1 0.253 0.522
Functions Are For Humans and Computers.
add_ten <- function(num) {
output <- num + 10
return(output)
}
add_ten(18)
## [1] 28
add_ten(1200)
## [1] 1210
Conditional Execution
basic <- function(i) {
if(i >= 5){
print("Greater than or = to 5")
return(i)
} else {
print("Less than 5")
return(i)
}
}
6 %>% basic()
## [1] "Greater than or = to 5"
## [1] 6
2 %>% basic()
## [1] "Less than 5"
## [1] 2
Function Arguments
x <- c(1:10, 100, NA)
x
## [1] 1 2 3 4 5 6 7 8 9 10 100 NA
x %>% mean()
## [1] NA
x %>% mean(na.rm = TRUE)
## [1] 14.09091
x %>% mean(na.rm = TRUE, trim = 0.1)
## [1] 6
mean_remove_na <- function(x, na.rm = TRUE, ...) {
avg <- mean(x, na.rm = na.rm, ...)
return(avg)
}
x %>% mean_remove_na()
## [1] 14.09091
x %>% mean_remove_na(na.rm = FALSE)
## [1] NA
x %>% mean_remove_na(trim = 0.1)
## [1] 6