#mean(is.na(x)) calculates the proportion of NAs in a vector
#I name it na_rate
na_rate <- function(x) {mean(is.na(x))}
na_rate(c(1,2,3,NA,4,5,NA))
## [1] 0.2857143
#x / sum(x, na.rm = TRUE) standardizes a vector so that it sums to one
#I’ll write a function named sum_to_one(), which is a function of a single argument, x, the vector to standardize, and an optional argument na.rm. The optional argument, na.rm, makes the function more expressive, since it can handle NA values in two ways (returning NA or dropping them)
sum_to_one <- function(x, na.rm=FALSE) {
x/sum(x,na.rm=na.rm)
}
#no missing values
sum_to_one(1:5)
## [1] 0.06666667 0.13333333 0.20000000 0.26666667 0.33333333
#if any missing, return all missing
sum_to_one(c(1:5, NA))
## [1] NA NA NA NA NA NA
#drop missing values
sum_to_one(c(1:5, NA), na.rm=TRUE)
## [1] 0.06666667 0.13333333 0.20000000 0.26666667 0.33333333 NA
#sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE) calculates the coefficient of variation
coef_variation <- function(x, na.rm = FALSE) {
sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm)
}
coef_variation(1:5)
## [1] 0.5270463
coef_variation(c(1:5, NA))
## [1] NA
coef_variation(c(1:5, NA), na.rm=TRUE)
## [1] 0.5270463
both_na <- function(x, y) {
sum(is.na(x) & is.na(y))
}
both_na(
c(NA, NA, 1, 2),
c(NA, 1, NA, 2)
)
## [1] 1
both_na(
c(NA, NA, 1, 2, NA, NA, 1),
c(NA, 1, NA, 2, NA, NA, 1)
)
## [1] 3
f1 <- function(string, prefix) {
substr(string, 1, nchar(prefix)) == prefix
}
f2 <- function(x) {
if (length(x) <= 1) return(NULL)
x[-length(x)]
}
f3 <- function(x, y) {
rep(y, length.out = length(x))
}
f1(c("abc", "abcde", "ad"), "ab")
## [1] TRUE TRUE FALSE
f2(1:3)
## [1] 1 2
f2(2:5)
## [1] 2 3 4
f3(1:3, 4)
## [1] 4 4 4
greet <- function(time = lubridate::now()) {
hr <- lubridate::hour(time)
# I don't know what to do about times after midnight,
# are they evening or morning?
if (hr < 12) {
print("good morning")
} else if (hr < 17) {
print("good afternoon")
} else {
print("good evening")
}
}
greet()
## [1] "good evening"
greet(ymd_h("2017-01-08:05"))
## [1] "good morning"
greet(ymd_h("2017-01-08:13"))
## [1] "good afternoon"
greet(ymd_h("2017-01-08:20"))
## [1] "good evening"
temp <- seq(-10, 50, by = 5)
cut(temp, c(-Inf, 0, 10, 20, 30, Inf),
right = TRUE,
labels = c("freezing", "cold", "cool", "warm", "hot")
)
## [1] freezing freezing freezing cold cold cool cool warm
## [9] warm hot hot hot hot
## Levels: freezing cold cool warm hot
#To have intervals open on the left (using <)
temp <- seq(-10, 50, by = 5)
cut(temp, c(-Inf, 0, 10, 20, 30, Inf),
right = FALSE,
labels = c("freezing", "cold", "cool", "warm", "hot")
)
## [1] freezing freezing cold cold cool cool warm warm
## [9] hot hot hot hot hot
## Levels: freezing cold cool warm hot