Course notes for Writing Functions in R
return() expression# Define ratio() function
ratio <- function(x, y) {
x / y
}
# Call ratio() with arguments 3 and 4
ratio(3,4)## [1] 0.75
# Rewrite the call to follow best practices
mean(c(1:9, NA), trim = 0.1, na.rm = TRUE)## [1] 5
f <- function(x) {
if (TRUE) {
return(x + 1)
}
x
}
f(2)## [1] 3
y <- 10
f <- function(x) {
x + y
}
f(10)## [1] 20
y <- 10
f <- function(x) {
y <- 5
x + y
}
f(10)## [1] 15
f <- function(x) {
y <- 5
x + y
}
f(5)## [1] 10
NULL often used to indicate the absence of a vectorNA used to indicate the absence of a value in a vector, aka a missing valuesum on a vector with NA will result in NA.[, [[, or $[ extracts a sublist[[ and $ extract elements, remove a level of hierarchy# 2nd element in tricky_list
typeof(tricky_list[[2]])## [1] "logical"
# Element called x in tricky_list
typeof(tricky_list[['x']])## [1] "list"
# 2nd element inside the element called x in tricky_list
typeof(tricky_list[['x']][[2]])## [1] "character"
# Guess where the regression model is stored
names(tricky_list)## [1] "nums" "y" "x" "model"
# Use names() and str() on the model element
names(tricky_list$model)## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
str(tricky_list$model)## List of 12
## $ coefficients : Named num [1:2] 37.29 -5.34
## ..- attr(*, "names")= chr [1:2] "(Intercept)" "wt"
## $ residuals : Named num [1:32] -2.28 -0.92 -2.09 1.3 -0.2 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ effects : Named num [1:32] -113.65 -29.116 -1.661 1.631 0.111 ...
## ..- attr(*, "names")= chr [1:32] "(Intercept)" "wt" "" "" ...
## $ rank : int 2
## $ fitted.values: Named num [1:32] 23.3 21.9 24.9 20.1 18.9 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ assign : int [1:2] 0 1
## $ qr :List of 5
## ..$ qr : num [1:32, 1:2] -5.657 0.177 0.177 0.177 0.177 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## .. .. ..$ : chr [1:2] "(Intercept)" "wt"
## .. ..- attr(*, "assign")= int [1:2] 0 1
## ..$ qraux: num [1:2] 1.18 1.05
## ..$ pivot: int [1:2] 1 2
## ..$ tol : num 1e-07
## ..$ rank : int 2
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 30
## $ xlevels : Named list()
## $ call :List of 12
## ..$ coefficients : Named num [1:2] 37.29 -5.34
## .. ..- attr(*, "names")= chr [1:2] "(Intercept)" "wt"
## ..$ residuals : Named num [1:32] -2.28 -0.92 -2.09 1.3 -0.2 ...
## .. ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## ..$ effects : Named num [1:32] -113.65 -29.116 -1.661 1.631 0.111 ...
## .. ..- attr(*, "names")= chr [1:32] "(Intercept)" "wt" "" "" ...
## ..$ rank : int 2
## ..$ fitted.values: Named num [1:32] 23.3 21.9 24.9 20.1 18.9 ...
## .. ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## ..$ assign : int [1:2] 0 1
## ..$ qr :List of 5
## .. ..$ qr : num [1:32, 1:2] -5.657 0.177 0.177 0.177 0.177 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## .. .. .. ..$ : chr [1:2] "(Intercept)" "wt"
## .. .. ..- attr(*, "assign")= int [1:2] 0 1
## .. ..$ qraux: num [1:2] 1.18 1.05
## .. ..$ pivot: int [1:2] 1 2
## .. ..$ tol : num 1e-07
## .. ..$ rank : int 2
## .. ..- attr(*, "class")= chr "qr"
## ..$ df.residual : int 30
## ..$ xlevels : Named list()
## ..$ call : language lm(formula = mpg ~ wt, data = mtcars)
## ..$ terms :Classes 'terms', 'formula' language mpg ~ wt
## .. .. ..- attr(*, "variables")= language list(mpg, wt)
## .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. .. ..$ : chr "wt"
## .. .. ..- attr(*, "term.labels")= chr "wt"
## .. .. ..- attr(*, "order")= int 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## ..$ model :'data.frame': 32 obs. of 2 variables:
## .. ..$ mpg: num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## .. ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
## .. ..- attr(*, "terms")=Classes 'terms', 'formula' language mpg ~ wt
## .. .. .. ..- attr(*, "variables")= language list(mpg, wt)
## .. .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. .. .. ..$ : chr "wt"
## .. .. .. ..- attr(*, "term.labels")= chr "wt"
## .. .. .. ..- attr(*, "order")= int 1
## .. .. .. ..- attr(*, "intercept")= int 1
## .. .. .. ..- attr(*, "response")= int 1
## .. .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## ..- attr(*, "class")= chr "lm"
## $ terms :Class 'formula' language mpg ~ wt
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## $ model :'data.frame': 32 obs. of 2 variables:
## ..$ mpg: num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
## ..- attr(*, "terms")=Class 'formula' language mpg ~ wt
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## - attr(*, "class")= chr "lm"
# Subset the coefficients element
tricky_list$model$coefficients## (Intercept) wt
## 37.285126 -5.344472
# Subset the wt element
tricky_list$model$coefficients[['wt']]## [1] -5.344472
seq_along()# Replace the 1:ncol(df) sequence
for (i in seq_along(df1)) {
print(median(df1[[i]]))
}## [1] 0.7389042
## [1] -1.098235
## [1] -0.0626907
## [1] 0.4441366
# Change the value of df
df <- data.frame()
# Repeat for loop to verify there is no error
for (i in seq_along(df)) {
print(median(df[[i]]))
}# Create new double vector: output
output <- vector("double", ncol(df1))
# Alter the loop
for (i in seq_along(df1)) {
# Change code to store result in output
output[i] <- median(df1[[i]])
}
# Print output
output## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
# Define example vector x
x <- 1:10
# Rewrite this snippet to refer to x
(x - min(x, na.rm = TRUE)) /
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))## [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
## [8] 0.7777778 0.8888889 1.0000000
# Define example vector x
x <- 1:10
# Define rng
rng <- range(x, na.rm = T)
# Rewrite this snippet to refer to the elements of rng
(x - rng[1]) /
(rng[2] - rng[1])## [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
## [8] 0.7777778 0.8888889 1.0000000
# Define example vector x
x <- 1:10
# Use the function template to create the rescale01 function
rescale01 <- function(x) {
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
# Test your function, call rescale01 using the vector x as the argument
rescale01(x)## [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
## [8] 0.7777778 0.8888889 1.0000000
# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3, 4)
# Count how many elements are missing in both x and y
sum(is.na(x) & is.na(y))## [1] 1
# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3, 4)
# Turn this snippet into a function: both_na()
both_na <- function(x,y) {
sum(is.na(x) & is.na(y))
}# Define x, y1 and y2
x <- c(NA, NA, NA)
y1 <- c( 1, NA, NA)
y2 <- c( 1, NA, NA, NA)
# Call both_na on x, y1
both_na(x, y1)## [1] 2
# Call both_na on x, y2
both_na(x, y2)## [1] 3
c, mean )impute_mising, collapse_yearsf2 <- function(x) {
if (length(x) <= 1) return(NULL)
x[-length(x)]
}remove_lastc is already they name of a functionnums is non conventional# Rewrite mean_ci to take arguments named level and x, rather then c and nums,
mean_ci <- function(level, x) {
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}# Alter the arguments to mean_ci
mean_ci <- function(x, level = .95) {
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}# Alter the mean_ci function
# Edit the mean_ci function using an if statement to check for the case when x is empty and if so, to produce the same warning as the code above then immediately return() c(-Inf, Inf).
mean_ci <- function(x, level = 0.95) {
if (length(x) == 0) {
warning("x was empty", call. = FALSE)
return(c(-Inf, Inf))
}
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
mean_ci(numeric(0))## [1] -Inf Inf
f <- function(x, y) {
x[is.na(x)] <- y
cat(sum(is.na(x)), y, "\n")
x
}
# Define a numeric vector x with the values 1, 2, NA, 4 and 5
x <- c(1, 2, NA, 4, 5)
# Call f() with the arguments x = x and y = 3
f(x = x, y = 3)## 0 3
## [1] 1 2 3 4 5
# Call f() with the arguments x = x and y = 10
f(x = x, y = 10)## 0 10
## [1] 1 2 10 4 5
NAs in x with the value given as y# Rename the function f() to replace_missings()
replace_missings <- function(x, replacement) {
# Change the name of the y argument to replacement
x[is.na(x)] <- replacement
cat(sum(is.na(x)), replacement, "\n")
x
}
# Rewrite the call on df$z to match our new names
df <- structure(list(z = c(0.473582631011786, -0.371809943354702, -0.88562980458499,
-0.768223158006804, 0.886983968322944, 0.238444716245814, -1.13439205742083,
NA, NA, -1.27240097583594)), .Names = "z", row.names = c(NA,
-10L), class = "data.frame")
df$z <- replace_missings(df$z, 0)## 0 0
df$z## [1] 0.4735826 -0.3718099 -0.8856298 -0.7682232 0.8869840 0.2384447
## [7] -1.1343921 0.0000000 0.0000000 -1.2724010
replace_missings <- function(x, replacement) {
# Define is_miss
is_miss <- is.na(x)
# Rewrite rest of function to refer to is_miss
x[is_miss] <- replacement
cat(sum(is_miss), replacement, "\n")
x
}replace_missings <- function(x, replacement) {
is_miss <- is.na(x)
x[is_miss] <- replacement
# Rewrite to use message()
message(sum(is_miss), " missings replaced by the value ", replacement)
x
}
# Check your new function by running on df$z
replace_missings(df$z, 0)## numeric(0)
# Initialize output vector
output <- vector("double", ncol(df1))
# Fill in the body of the for loop
for (i in seq_along(df1)) {
output[i] <- median(df1[[i]])
}
# View the result
output## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
# Turn this code into col_median()
output <- vector("double", ncol(df))
col_median <- function(df) {
for (i in seq_along(df)) {
output[[i]] <- median(df[[i]])
}
output
}
col_median(df1)## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
# Create col_mean() function to find column means
col_mean <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
}
col_mean(df1)## [1] 0.21356218 -0.95626180 -0.06994885 0.51866611
# Define col_sd() function
col_sd <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- sd(df[[i]])
}
output
}
col_sd(df1)## [1] 1.2302953 0.9066193 0.7034230 1.2889431
# Add a second argument called power
f <- function(x, power) {
# Edit the body to return absolute deviations raised to power
abs(x - mean(x)) ^ power
}col_summary <- function(df, fun) {
output <- vector("numeric", length(df))
for (i in seq_along(df)) {
output[[i]] <- fun(df[[i]])
}
output
}
# Find the column medians using col_median() and col_summary()
col_median(df1)## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
col_summary(df1, fun = median)## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
# Find the column means using col_mean() and col_summary()
col_mean(df1)## [1] 0.21356218 -0.95626180 -0.06994885 0.51866611
col_summary(df1, fun = mean)## [1] 0.21356218 -0.95626180 -0.06994885 0.51866611
# Find the column IQRs using col_summary()
col_summary(df1, fun = IQR)## [1] 2.0399632 0.9621661 0.5023385 1.8777572
.x.f to each elementmap returns a listmap_dbl returns a vector of doublesmap_lgl returns a vector of logicalsmap_int same for integersmap_chr same for characters.f# Load the purrr package
library(purrr)
head(df)## z
## 1 0.4735826
## 2 -0.3718099
## 3 -0.8856298
## 4 -0.7682232
## 5 0.8869840
## 6 0.2384447
# Use map_dbl() to find column means
map_dbl(df, mean)## z
## -0.2833445
# Use map_dbl() to column medians
map_dbl(df, median)## z
## -0.185905
# Use map_dbl() to find column standard deviations
map_dbl(df, sd)## z
## 0.7213903
head(planes)## year engines seats speed
## 1 1956 4 102 232
## 2 1975 1 4 108
## 3 1977 2 139 432
## 4 1996 2 142 NA
## 5 2010 2 20 NA
## 6 NA 1 2 NA
# Find the mean of each column
map_dbl(planes, mean)## year engines seats speed
## NA 2.00000 68.16667 NA
# Find the mean of each column, excluding missing values
map_dbl(planes, mean, na.rm = T)## year engines seats speed
## 1982.80000 2.00000 68.16667 257.33333
# Find the 5th percentile of each column, excluding missing values
map_dbl(planes, quantile, probs = .05, na.rm = T)## year engines seats speed
## 1959.8 1.0 2.5 120.4
# Find the columns that are numeric
map_lgl(df3, is.numeric)## A B C D
## TRUE FALSE TRUE TRUE
# Find the type of each column
map_chr(df3, typeof)## A B C D
## "double" "character" "integer" "double"
# Find a summary of each column
map(df3, summary)## $A
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.4039 -0.1372 0.1694 0.1854 0.5858 1.4211
##
## $B
## Length Class Mode
## 10 character character
##
## $C
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.25 5.50 5.50 7.75 10.00
##
## $D
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.4324 -0.6868 -0.2348 -0.1268 0.6237 1.2318
## An existing function
map(df1, summary)## $a
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.7702 -0.9379 0.7389 0.2136 1.1021 1.4268
##
## $b
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.1469 -1.5504 -1.0982 -0.9563 -0.5882 0.8299
##
## $c
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.20476 -0.33035 -0.06269 -0.06995 0.17199 1.22003
##
## $d
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.3099 -0.4489 0.4441 0.5187 1.4289 2.7267
## An existing funciton you defined
map(df1, rescale01)## $a
## [1] 0.9763800 0.8193699 0.9247780 0.6621110 1.0000000 0.1264128 0.0000000
## [8] 0.1263912 0.8072770 0.7623936
##
## $b
## [1] 1.0000000 0.3553432 0.3492247 0.1752596 0.5239921 0.5224524 0.7258844
## [8] 0.0718125 0.0000000 0.2757911
##
## $c
## [1] 0.4703795 0.0000000 0.5982757 0.3647366 0.4716139 0.7982322 0.4762885
## [8] 0.1412718 1.0000000 0.3592357
##
## $d
## [1] 0.2366551 0.2055043 0.6105489 1.0000000 0.7836893 0.1233262 0.7011169
## [8] 0.4286287 0.0000000 0.4404229
## An anonymous function defined on the fly
map(df1, function(x) sum(is.na(x)))## $a
## [1] 0
##
## $b
## [1] 0
##
## $c
## [1] 0
##
## $d
## [1] 0
## An anonymous function defined using a formula shortcut
map(df1, ~ sum(is.na(.)))## $a
## [1] 0
##
## $b
## [1] 0
##
## $c
## [1] 0
##
## $d
## [1] 0
.f is [[list_of_results <- list(
list(a = 1, b = "A"),
list(a = 2, b = "C"),
list(a = 3, b = "D")
)
map_dbl(list_of_results, function(x) x[["a"]])## [1] 1 2 3
map_dbl(list_of_results, "a")## [1] 1 2 3
map_dbl(list_of_results, 1)## [1] 1 2 3
# Examine the structure of cyl
str(cyl)## List of 3
## $ 4:'data.frame': 11 obs. of 11 variables:
## ..$ mpg : num [1:11] 22.8 24.4 22.8 32.4 30.4 33.9 21.5 27.3 26 30.4 ...
## ..$ cyl : num [1:11] 4 4 4 4 4 4 4 4 4 4 ...
## ..$ disp: num [1:11] 108 146.7 140.8 78.7 75.7 ...
## ..$ hp : num [1:11] 93 62 95 66 52 65 97 66 91 113 ...
## ..$ drat: num [1:11] 3.85 3.69 3.92 4.08 4.93 4.22 3.7 4.08 4.43 3.77 ...
## ..$ wt : num [1:11] 2.32 3.19 3.15 2.2 1.61 ...
## ..$ qsec: num [1:11] 18.6 20 22.9 19.5 18.5 ...
## ..$ vs : num [1:11] 1 1 1 1 1 1 1 1 0 1 ...
## ..$ am : num [1:11] 1 0 0 1 1 1 0 1 1 1 ...
## ..$ gear: num [1:11] 4 4 4 4 4 4 3 4 5 5 ...
## ..$ carb: num [1:11] 1 2 2 1 2 1 1 1 2 2 ...
## $ 6:'data.frame': 7 obs. of 11 variables:
## ..$ mpg : num [1:7] 21 21 21.4 18.1 19.2 17.8 19.7
## ..$ cyl : num [1:7] 6 6 6 6 6 6 6
## ..$ disp: num [1:7] 160 160 258 225 168 ...
## ..$ hp : num [1:7] 110 110 110 105 123 123 175
## ..$ drat: num [1:7] 3.9 3.9 3.08 2.76 3.92 3.92 3.62
## ..$ wt : num [1:7] 2.62 2.88 3.21 3.46 3.44 ...
## ..$ qsec: num [1:7] 16.5 17 19.4 20.2 18.3 ...
## ..$ vs : num [1:7] 0 0 1 1 1 1 0
## ..$ am : num [1:7] 1 1 0 0 0 0 1
## ..$ gear: num [1:7] 4 4 3 3 4 4 5
## ..$ carb: num [1:7] 4 4 1 1 4 4 6
## $ 8:'data.frame': 14 obs. of 11 variables:
## ..$ mpg : num [1:14] 18.7 14.3 16.4 17.3 15.2 10.4 10.4 14.7 15.5 15.2 ...
## ..$ cyl : num [1:14] 8 8 8 8 8 8 8 8 8 8 ...
## ..$ disp: num [1:14] 360 360 276 276 276 ...
## ..$ hp : num [1:14] 175 245 180 180 180 205 215 230 150 150 ...
## ..$ drat: num [1:14] 3.15 3.21 3.07 3.07 3.07 2.93 3 3.23 2.76 3.15 ...
## ..$ wt : num [1:14] 3.44 3.57 4.07 3.73 3.78 ...
## ..$ qsec: num [1:14] 17 15.8 17.4 17.6 18 ...
## ..$ vs : num [1:14] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ am : num [1:14] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ gear: num [1:14] 3 3 3 3 3 3 3 3 3 3 ...
## ..$ carb: num [1:14] 2 4 3 3 3 4 4 4 2 2 ...
# Extract the first element into four_cyls
four_cyls <- cyl[[1]]
# Fit a linear regression of mpg on wt using four_cyls
lm(four_cyls$mpg ~ four_cyls$wt)##
## Call:
## lm(formula = four_cyls$mpg ~ four_cyls$wt)
##
## Coefficients:
## (Intercept) four_cyls$wt
## 39.571 -5.647
# Rewrite to call an anonymous function
map(cyl, function(df) lm(mpg ~ wt, data = df))## $`4`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 39.571 -5.647
##
##
## $`6`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 28.41 -2.78
##
##
## $`8`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 23.868 -2.192
# Rewrite to use the formula shortcut instead
map(cyl, ~ lm(mpg ~ wt, data = .))## $`4`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 39.571 -5.647
##
##
## $`6`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 28.41 -2.78
##
##
## $`8`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 23.868 -2.192
# Save the result from the previous exercise to the variable models
models <- map(cyl, ~ lm(mpg ~ wt, data = .))
# Use map and coef to get the coefficients for each model: coefs
coefs <- map(models, coef)
# Use string shortcut to extract the wt coefficient
map(coefs, "wt")## $`4`
## [1] -5.647025
##
## $`6`
## [1] -2.780106
##
## $`8`
## [1] -2.192438
coefs <- map(models, coef)
# use map_dbl with the numeric shortcut to pull out the second element
map_dbl(coefs, 2)## 4 6 8
## -5.647025 -2.780106 -2.192438
# Define models (don't change)
models <- mtcars %>%
split(mtcars$cyl) %>%
map(~ lm(mpg ~ wt, data = .))
# Rewrite to be a single command using pipes
summaries <- models %>%
map(summary) %>%
map_dbl("r.squared")
summaries## 4 6 8
## 0.5086326 0.4645102 0.4229655
safely() captures the successful result or the error, always returns a listpossible() always succeeds, you give it a default value to return when there is an errorquietly() captures printed output, messages, and warnings instead of capturing errors# Create safe_readLines() by passing readLines() to safely()
safe_readLines <- safely(readLines)
# Call safe_readLines() on "http://example.org"
safe_readLines("http://example.org")## $result
## [1] "<!doctype html>"
## [2] "<html>"
## [3] "<head>"
## [4] " <title>Example Domain</title>"
## [5] ""
## [6] " <meta charset=\"utf-8\" />"
## [7] " <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\" />"
## [8] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />"
## [9] " <style type=\"text/css\">"
## [10] " body {"
## [11] " background-color: #f0f0f2;"
## [12] " margin: 0;"
## [13] " padding: 0;"
## [14] " font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;"
## [15] " "
## [16] " }"
## [17] " div {"
## [18] " width: 600px;"
## [19] " margin: 5em auto;"
## [20] " padding: 50px;"
## [21] " background-color: #fff;"
## [22] " border-radius: 1em;"
## [23] " }"
## [24] " a:link, a:visited {"
## [25] " color: #38488f;"
## [26] " text-decoration: none;"
## [27] " }"
## [28] " @media (max-width: 700px) {"
## [29] " body {"
## [30] " background-color: #fff;"
## [31] " }"
## [32] " div {"
## [33] " width: auto;"
## [34] " margin: 0 auto;"
## [35] " border-radius: 0;"
## [36] " padding: 1em;"
## [37] " }"
## [38] " }"
## [39] " </style> "
## [40] "</head>"
## [41] ""
## [42] "<body>"
## [43] "<div>"
## [44] " <h1>Example Domain</h1>"
## [45] " <p>This domain is established to be used for illustrative examples in documents. You may use this"
## [46] " domain in examples without prior coordination or asking for permission.</p>"
## [47] " <p><a href=\"http://www.iana.org/domains/example\">More information...</a></p>"
## [48] "</div>"
## [49] "</body>"
## [50] "</html>"
##
## $error
## NULL
# Call safe_readLines() on "http://asdfasdasdkfjlda"
safe_readLines("http://asdfasdasdkfjlda")## $result
## NULL
##
## $error
## <simpleError in file(con, "r"): cannot open the connection to 'http://asdfasdasdkfjlda'>
urls <- structure(list(
example = "http://example.org",
rproj = "http://www.r-project.org",
asdf = "http://asdfasdasdkfjlda"),
.Names = c("example", "rproj", "asdf"))
# Define safe_readLines()
safe_readLines <- safely(readLines)
# Use the safe_readLines() function with map(): html
html <- map(urls, safe_readLines)
# Call str() on html
str(html)## List of 3
## $ example:List of 2
## ..$ result: chr [1:50] "<!doctype html>" "<html>" "<head>" " <title>Example Domain</title>" ...
## ..$ error : NULL
## $ rproj :List of 2
## ..$ result: chr [1:120] "<!DOCTYPE html>" "<html lang=\"en\">" " <head>" " <meta charset=\"utf-8\">" ...
## ..$ error : NULL
## $ asdf :List of 2
## ..$ result: NULL
## ..$ error :List of 2
## .. ..$ message: chr "cannot open the connection to 'http://asdfasdasdkfjlda'"
## .. ..$ call : language file(con, "r")
## .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
# Extract the result from one of the successful elements
html[[1]][['result']]## [1] "<!doctype html>"
## [2] "<html>"
## [3] "<head>"
## [4] " <title>Example Domain</title>"
## [5] ""
## [6] " <meta charset=\"utf-8\" />"
## [7] " <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\" />"
## [8] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />"
## [9] " <style type=\"text/css\">"
## [10] " body {"
## [11] " background-color: #f0f0f2;"
## [12] " margin: 0;"
## [13] " padding: 0;"
## [14] " font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;"
## [15] " "
## [16] " }"
## [17] " div {"
## [18] " width: 600px;"
## [19] " margin: 5em auto;"
## [20] " padding: 50px;"
## [21] " background-color: #fff;"
## [22] " border-radius: 1em;"
## [23] " }"
## [24] " a:link, a:visited {"
## [25] " color: #38488f;"
## [26] " text-decoration: none;"
## [27] " }"
## [28] " @media (max-width: 700px) {"
## [29] " body {"
## [30] " background-color: #fff;"
## [31] " }"
## [32] " div {"
## [33] " width: auto;"
## [34] " margin: 0 auto;"
## [35] " border-radius: 0;"
## [36] " padding: 1em;"
## [37] " }"
## [38] " }"
## [39] " </style> "
## [40] "</head>"
## [41] ""
## [42] "<body>"
## [43] "<div>"
## [44] " <h1>Example Domain</h1>"
## [45] " <p>This domain is established to be used for illustrative examples in documents. You may use this"
## [46] " domain in examples without prior coordination or asking for permission.</p>"
## [47] " <p><a href=\"http://www.iana.org/domains/example\">More information...</a></p>"
## [48] "</div>"
## [49] "</body>"
## [50] "</html>"
# Extract the error from the element that was unsuccessful
html[[3]][['error']]## <simpleError in file(con, "r"): cannot open the connection to 'http://asdfasdasdkfjlda'>
# Define save_readLines() and html
safe_readLines <- safely(readLines)
html <- map(urls, safe_readLines)
# Examine the structure of transpose(html)
str(transpose(html))## List of 2
## $ result:List of 3
## ..$ example: chr [1:50] "<!doctype html>" "<html>" "<head>" " <title>Example Domain</title>" ...
## ..$ rproj : chr [1:120] "<!DOCTYPE html>" "<html lang=\"en\">" " <head>" " <meta charset=\"utf-8\">" ...
## ..$ asdf : NULL
## $ error :List of 3
## ..$ example: NULL
## ..$ rproj : NULL
## ..$ asdf :List of 2
## .. ..$ message: chr "cannot open the connection to 'http://asdfasdasdkfjlda'"
## .. ..$ call : language file(con, "r")
## .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
# Extract the results: res
res <- transpose(html)[['result']]
str(res)## List of 3
## $ example: chr [1:50] "<!doctype html>" "<html>" "<head>" " <title>Example Domain</title>" ...
## $ rproj : chr [1:120] "<!DOCTYPE html>" "<html lang=\"en\">" " <head>" " <meta charset=\"utf-8\">" ...
## $ asdf : NULL
# Extract the errors: errs
errs <- transpose(html)[['error']]
str(errs)## List of 3
## $ example: NULL
## $ rproj : NULL
## $ asdf :List of 2
## ..$ message: chr "cannot open the connection to 'http://asdfasdasdkfjlda'"
## ..$ call : language file(con, "r")
## ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
# Initialize some objects
safe_readLines <- safely(readLines)
html <- map(urls, safe_readLines)
res <- transpose(html)[["result"]]
errs <- transpose(html)[["error"]]
# Create a logical vector is_ok
is_ok <- map_lgl(errs, is_null)
# Extract the successful results
## ha, I just used map to shorten my results with head. : )
map(res[is_ok],head)## $example
## [1] "<!doctype html>" "<html>"
## [3] "<head>" " <title>Example Domain</title>"
## [5] "" " <meta charset=\"utf-8\" />"
##
## $rproj
## [1] "<!DOCTYPE html>"
## [2] "<html lang=\"en\">"
## [3] " <head>"
## [4] " <meta charset=\"utf-8\">"
## [5] " <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">"
## [6] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">"
# Extract the input from the unsuccessful results
urls[!is_ok]## $asdf
## [1] "http://asdfasdasdkfjlda"
map2() - iterate over two argumentspmap() - iterate over many argumentsinvoke_map() - iterate over funtions and argumentsmap(), each has a whole family of functions:
map2_dpl, map2_lgl, pmap_dbl, etc# Create a list n containing the values: 5, 10, and 20
n <- list(5,10,20)
# Call map() on n with rnorm() to simulate three samples
map(n, rnorm)## [[1]]
## [1] 1.0657847 0.2611918 0.4288288 0.9679625 1.3777859
##
## [[2]]
## [1] -2.247033778 0.181748986 -0.095428614 0.121105226 -1.004988324
## [6] -0.002616321 -0.999346813 -1.743157358 2.757152773 0.835123211
##
## [[3]]
## [1] -0.50522969 -0.01643227 -0.06806224 -0.01626969 1.91578173
## [6] -1.48767692 1.46299184 0.86248404 0.96487125 1.04576348
## [11] 2.42947709 0.02577124 -1.64642666 -0.20262075 -0.71950170
## [16] 0.02241728 -0.95241311 -0.66993694 -0.60649940 -0.47080789
# Initialize n
n <- list(5, 10, 20)
# Create a list mu containing the values: 1, 5, and 10
mu <- list(1, 5, 10)
# Edit to call map2() on n and mu with rnorm() to simulate three samples
map2(n, mu, rnorm)## [[1]]
## [1] 0.1807151 0.7468013 2.5763749 3.0714945 1.6923841
##
## [[2]]
## [1] 5.645818 5.538453 5.381641 5.690021 5.779623 4.538251 4.552508
## [8] 7.720836 4.662588 4.486540
##
## [[3]]
## [1] 10.818744 10.546941 8.862296 10.882146 9.620230 11.220665 10.558551
## [8] 10.263423 9.584339 9.655919 9.645609 9.677518 10.714020 10.041883
## [15] 9.475666 9.054924 12.155162 11.518584 10.877205 10.252257
# Initialize n and mu
n <- list(5, 10, 20)
mu <- list(1, 5, 10)
# Create a sd list with the values: 0.1, 1 and 0.1
sd <- list(0.1, 1, 0.1)
# Edit this call to pmap() to iterate over the sd list as well
pmap(list(n, mu, sd), rnorm)## [[1]]
## [1] 1.2011004 1.0075495 1.0085773 0.7258698 1.0563910
##
## [[2]]
## [1] 5.434895 4.982361 5.086886 4.255512 4.432169 4.165156 5.010052
## [8] 5.790142 3.712141 5.651378
##
## [[3]]
## [1] 10.068812 10.102294 10.325421 10.036432 10.055530 10.014518 10.120995
## [8] 10.099489 10.146940 10.030272 10.014673 9.947211 10.006676 9.929278
## [15] 10.075016 10.075523 10.139564 10.044916 9.864735 9.973932
# Name the elements of the argument list
pmap(list(mean = mu, n = n, sd = sd), rnorm)## [[1]]
## [1] 0.8671455 1.0588357 1.0996202 1.1309776 0.9051729
##
## [[2]]
## [1] 3.665373 5.716052 3.416747 4.387419 4.731132 6.493710 5.640473
## [8] 6.227682 5.431491 4.361669
##
## [[3]]
## [1] 10.092104 10.034538 9.924304 10.067202 9.910141 10.009064 10.036776
## [8] 10.037023 9.918573 9.999523 9.877229 9.999799 10.040469 9.971336
## [15] 10.126249 9.978914 9.986848 9.852564 9.924584 9.912282
# Define list of functions
f <- list("rnorm", "runif", "rexp")
# Parameter list for rnorm()
rnorm_params <- list(mean = 10)
# Add a min element with value 0 and max element with value 5
runif_params <- list(min = 0, max = 5)
# Add a rate element with value 5
rexp_params <- list(rate = 5)
# Define params for each function
params <- list(
rnorm_params,
runif_params,
rexp_params
)
# Call invoke_map() on f supplying params as the second argument
invoke_map(f, params, n = 5)## [[1]]
## [1] 11.707807 9.917399 9.174763 10.229222 10.838103
##
## [[2]]
## [1] 2.819317 4.033093 4.082667 1.354417 0.736542
##
## [[3]]
## [1] 0.19971120 0.03046661 0.03359189 0.19867997 0.28921578
walk() works just like map(), but is designed for functions called for their side effects# Define list of functions
f <- list(Normal = "rnorm", Uniform = "runif", Exp = "rexp")
# Define params
params <- list(
Normal = list(mean = 10),
Uniform = list(min = 0, max = 5),
Exp = list(rate = 5)
)
# Assign the simulated samples to sims
sims <- invoke_map(f, params, n = 50)
# Use walk() to make a histogram of each element in sims
walk(sims, hist)# Replace "Sturges" with reasonable breaks for each sample
breaks_list <- list(
Normal = seq(6, 16, 0.5),
Uniform = seq(0, 5, 0.25),
Exp = seq(0, 1.5, 0.1)
)
# Use walk2() to make histograms with the right breaks
walk2(sims, breaks_list, hist)# Turn this snippet into find_breaks()
find_breaks <- function(x) {
rng <- range(x, na.rm = TRUE)
seq(rng[1], rng[2], length.out = 30)
}
# Call find_breaks() on sims[[1]]
find_breaks(sims[[1]])## [1] 7.613127 7.746538 7.879950 8.013362 8.146773 8.280185 8.413597
## [8] 8.547008 8.680420 8.813832 8.947243 9.080655 9.214067 9.347478
## [15] 9.480890 9.614301 9.747713 9.881125 10.014536 10.147948 10.281360
## [22] 10.414771 10.548183 10.681595 10.815006 10.948418 11.081830 11.215241
## [29] 11.348653 11.482065
# Use map() to iterate find_breaks() over sims: nice_breaks
nice_breaks <- map(sims, find_breaks)
# Use nice_breaks as the second argument to walk2()
walk2(sims, nice_breaks, hist)# Increase sample size to 1000
sims <- invoke_map(f, params, n = 1000)
# Compute nice_breaks (don't change this)
nice_breaks <- map(sims, find_breaks)
# Create a vector nice_titles
nice_titles <- c(
"Normal(10, 1)",
"Uniform(0, 5)",
"Exp(5)"
)
# Use pwalk() instead of walk2()
pwalk(list(x = sims, breaks = nice_breaks, main = nice_titles), hist, xlab = "")# Pipe this along to map(), using summary() as .f
sims %>%
walk(hist) %>%
map(summary)## $Normal
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.081 9.293 9.951 9.973 10.649 12.983
##
## $Uniform
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00542 1.11491 2.45483 2.48446 3.85500 4.99769
##
## $Exp
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000448 0.0556245 0.1380562 0.1970419 0.2721849 1.9512666
df[, vars]subset(df, x == y)data.frame(x = "a")# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)
both_na <- function(x, y) {
# Add stopifnot() to check length of x and y
stopifnot(length(x) == length(y))
sum(is.na(x) & is.na(y))
}
# Call both_na() on x and y
both_na(x, y)## Error: length(x) == length(y) is not TRUE
# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)
both_na <- function(x, y) {
# Replace condition with logical
if (length(x) != length(y)) {
# Replace "Error" with better message
stop("x and y must have the same length", call. = FALSE)
}
sum(is.na(x) & is.na(y))
}
# Call both_na()
both_na(x, y)## Error: x and y must have the same length
[ is a common source of surprises
drop = FALSE: df[x, , drop = FALSE]df <- data.frame(
a = 1L,
b = 1.5,
y = Sys.time(),
z = ordered(1)
)
A <- sapply(df[1:4], class)
A## $a
## [1] "integer"
##
## $b
## [1] "numeric"
##
## $y
## [1] "POSIXct" "POSIXt"
##
## $z
## [1] "ordered" "factor"
B <- sapply(df[3:4], class)
B## y z
## [1,] "POSIXct" "ordered"
## [2,] "POSIXt" "factor"
# sapply calls
A <- sapply(df[1:4], class)
B <- sapply(df[3:4], class)
C <- sapply(df[1:2], class)
# Demonstrate type inconsistency
str(A)## List of 4
## $ a: chr "integer"
## $ b: chr "numeric"
## $ y: chr [1:2] "POSIXct" "POSIXt"
## $ z: chr [1:2] "ordered" "factor"
str(B)## chr [1:2, 1:2] "POSIXct" "POSIXt" "ordered" "factor"
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "y" "z"
str(C)## Named chr [1:2] "integer" "numeric"
## - attr(*, "names")= chr [1:2] "a" "b"
# Use map() to define X, Y and Z
X <- map(df[1:4], class)
Y <- map(df[3:4], class)
Z <- map(df[1:2], class)
# Use str() to check type consistency
str(X)## List of 4
## $ a: chr "integer"
## $ b: chr "numeric"
## $ y: chr [1:2] "POSIXct" "POSIXt"
## $ z: chr [1:2] "ordered" "factor"
str(Y)## List of 2
## $ y: chr [1:2] "POSIXct" "POSIXt"
## $ z: chr [1:2] "ordered" "factor"
str(Z)## List of 2
## $ a: chr "integer"
## $ b: chr "numeric"
col_classes <- function(df) {
# Assign list output to class_list
class_list <- map(df, class)
# Use map_chr() to extract first element in class_list
map_chr(class_list, 1)
}
# Check that our new function is type consistent
df %>% col_classes() %>% str()## Named chr [1:4] "integer" "numeric" "POSIXct" "ordered"
## - attr(*, "names")= chr [1:4] "a" "b" "y" "z"
df[3:4] %>% col_classes() %>% str()## Named chr [1:2] "POSIXct" "ordered"
## - attr(*, "names")= chr [1:2] "y" "z"
df[1:2] %>% col_classes() %>% str()## Named chr [1:2] "integer" "numeric"
## - attr(*, "names")= chr [1:2] "a" "b"
## or make our own error message
col_classes <- function(df) {
class_list <- map(df, class)
# Add a check that no element of class_list has length > 1
if (any(map_dbl(class_list, length) > 1)) {
stop("Some columns have more than one class", call. = FALSE)
}
# Use flatten_chr() to return a character vector
flatten_chr(class_list)
}
# Check that our new function is type consistent
df %>% col_classes() %>% str()## Error: Some columns have more than one class
df[3:4] %>% col_classes() %>% str()## Error: Some columns have more than one class
df[1:2] %>% col_classes() %>% str()## chr [1:2] "integer" "numeric"
big_x <- function(df, threshold) {
dplyr::filter(df, x > threshold)
}
# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, 7)## # A tibble: 2 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 1.35 Ideal G VS1 60.9 54 10471 7.18 7.15 4.36
## 2 1.63 Ideal F I1 62.0 55 7229 7.57 7.50 4.68
# Remove the x column from diamonds
diamonds_sub$x <- NULL
# Create variable x with value 1
x <- 1
# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, 7)## # A tibble: 0 x 9
## # ... with 9 variables: carat <dbl>, cut <ord>, color <ord>,
## # clarity <ord>, depth <dbl>, table <dbl>, price <int>, y <dbl>, z <dbl>
# Create a threshold column with value 100
diamonds_sub$threshold <- 100
# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, 7)## # A tibble: 0 x 10
## # ... with 10 variables: carat <dbl>, cut <ord>, color <ord>,
## # clarity <ord>, depth <dbl>, table <dbl>, price <int>, y <dbl>,
## # z <dbl>, threshold <dbl>
big_x <- function(df, threshold) {
# Write a check for x not being in df
if (!'x' %in% names(df)) {
stop("df must contain variable called x", call. = FALSE)
}
# Write a check for threshold being in df
if ('threshold' %in% names(df)) {
stop("df must not contain variable called threshold", call. = FALSE)
}
dplyr::filter(df, x > threshold)
}
big_x(diamonds_sub, 7)## Error: df must contain variable called x
# Fit a regression model
fit <- lm(mpg ~ wt, data = mtcars)
# Look at the summary of the model
summary(fit)##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
# Set the global digits option to 2
options(digits = 2)
# Take another look at the summary
summary(fit)##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.543 -2.365 -0.125 1.410 6.873
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.285 1.878 19.86 < 2e-16 ***
## wt -5.344 0.559 -9.56 1.3e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3 on 30 degrees of freedom
## Multiple R-squared: 0.753, Adjusted R-squared: 0.745
## F-statistic: 91.4 on 1 and 30 DF, p-value: 1.29e-10