install.packages("tidyverse", repos = "http://cran.us.r-project.org", dependencies = TRUE)
library(tidyverse)
What we do:
Review the basic structure of a user-defined function
Learn how to use conditions in the body of a function
Whenever you’ve copied and pasted a block of code more than twice.
df <- tibble::tibble(
a = rnorm(10),
b = rnomr(10),
c = rnorm(10),
d = rnorm(10)
)
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
(max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$b <- (df$b - min(df$b, na.rm = TRUE)) /
(max(df$b, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$c <- (df$c - min(df$c, na.rm = TRUE)) /
(max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))
df$d <- (df$d - min(df$d, na.rm = TRUE)) /
(max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))
Count the number of inputs.
Rewrite the code with temporary variables with general names.
x <- df$a
(x - min(x, na.rm= TRUE))/
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
# create an intuitive name for an intermediate outcome
rng <- range(x, na.rm = TRUE) # returns a vector of min and max values
(x - rng[1])/(rng[2] - rng[1])
rescale01 <- function(x){
rng <- range(x, na.rm = TRUE)
(x - rng[1])/(rng[2] - rng[1])
}
rescale01(c(0, 5, 10))
Pick a name for your function.
List the inputs, or arguments, to the function inside function.
Put the code in the body of the function.
Start with working code and turn it into a function.
df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)
If our input changes, we can change the code only once.
x <- c(1:10, Inf) #infinity
rescale01(x)
rescale01 <- function(x){
rng <- range(x, na.rm = TRUE, finite = TRUE) # accepts only finite numbers
(x - rng[1])/(rng[2] - rng[1])
}
rescale01(x)
A few tricks for better function names
Use verbs instead of nouns, unless nouns are well known (e.g., mean)
Use several words in lowercase and connect them by underscore (_). More importantly, be consistent.
If you create several functions doing similar things, use a common prefix.
# some random functions
input_select()
input_checkbox()
input_text()
Avoid function names that are already in use by other packages.
Put #comments on why, not what or how in the body of functions.
A function returns the last value it computed.
has_name <- function(x){
nms <- names(x)
if (is.null(nms)){
rep(FALSE, length(x))
} else {
!is.na(nms) & nms != ""
}
}
# example
x <- c(1, 2, 3)
names(x) <- c("a", "b", "c")
has_name(x)
names(x) <- c("a", NA, "c")
has_name(x)
The condition should be a single value, not a vector. Use || or &&, not | or &. If you have multiple elements, use any() or all().
Check: any, all
x <- c(TRUE, FALSE, TRUE)
any(x == TRUE)
all(x == TRUE)
Avoid == in the condition. It’s for vectors: it may return a vector. Instead, collapse with any() or all() and identical(), which always returns a single TRUE or FALSE.
# identical() may be too strict. It doesn't coerce.
identical(0L, 0) # 0L: integer
# then, use dplyr::near()
x <- sqrt(2)^2
x
x == 2
x - 2
identical(x, 2)
dplyr::near(x, 2)
Chain multiple if statements together:
if (this) {
# do this
} else if (that) {
# do something else
} else {
#
}
If a function is very short, write it in one line without curly brackets.
y < -10
x <- if (y<20) "Too low" else "Too high"
No.4 How could you use cut() to simplify this set of nested if-else statements?
if (temp <= 0) {
"freezing"
} else if (temp <= 10) {
"cold"
} else if (temp <= 20) {
"cool"
} else if (temp <= 30) {
"warm"
} else {
"hot"
}
How would you change the call to cut() if I’d used < instead of <=? What is the other chief advantage of cut() for this problem? (Hint: what happens if you have many values in temp?)
No.5 What happens if you use switch() with numeric values?
No.6 What does this switch() call do? What happens if x is “e”?
switch(x,
a = ,
b = "ab",
c = ,
d = "cd"
)
Experiment, then carefully read the documentation.