return() expression# Define ratio() function
ratio <- function(x, y) {
x / y
}
# Call ratio() with arguments 3 and 4
ratio(3,4)## [1] 0.75
## [1] 5
NULL often used to indicate the absence of a vectorNA used to indicate the absence of a value in a vector, aka a missing valuesum on a vector with NA will result in NA.[, [[, or $
[ extracts a sublist[[ and $ extract elements, remove a level of hierarchy## [1] "logical"
## [1] "list"
## [1] "character"
## [1] "nums" "y" "x" "model"
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
## List of 12
## $ coefficients : Named num [1:2] 37.29 -5.34
## ..- attr(*, "names")= chr [1:2] "(Intercept)" "wt"
## $ residuals : Named num [1:32] -2.28 -0.92 -2.09 1.3 -0.2 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ effects : Named num [1:32] -113.65 -29.116 -1.661 1.631 0.111 ...
## ..- attr(*, "names")= chr [1:32] "(Intercept)" "wt" "" "" ...
## $ rank : int 2
## $ fitted.values: Named num [1:32] 23.3 21.9 24.9 20.1 18.9 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ assign : int [1:2] 0 1
## $ qr :List of 5
## ..$ qr : num [1:32, 1:2] -5.657 0.177 0.177 0.177 0.177 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## .. .. ..$ : chr [1:2] "(Intercept)" "wt"
## .. ..- attr(*, "assign")= int [1:2] 0 1
## ..$ qraux: num [1:2] 1.18 1.05
## ..$ pivot: int [1:2] 1 2
## ..$ tol : num 1e-07
## ..$ rank : int 2
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 30
## $ xlevels : Named list()
## $ call :List of 12
## ..$ coefficients : Named num [1:2] 37.29 -5.34
## .. ..- attr(*, "names")= chr [1:2] "(Intercept)" "wt"
## ..$ residuals : Named num [1:32] -2.28 -0.92 -2.09 1.3 -0.2 ...
## .. ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## ..$ effects : Named num [1:32] -113.65 -29.116 -1.661 1.631 0.111 ...
## .. ..- attr(*, "names")= chr [1:32] "(Intercept)" "wt" "" "" ...
## ..$ rank : int 2
## ..$ fitted.values: Named num [1:32] 23.3 21.9 24.9 20.1 18.9 ...
## .. ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## ..$ assign : int [1:2] 0 1
## ..$ qr :List of 5
## .. ..$ qr : num [1:32, 1:2] -5.657 0.177 0.177 0.177 0.177 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## .. .. .. ..$ : chr [1:2] "(Intercept)" "wt"
## .. .. ..- attr(*, "assign")= int [1:2] 0 1
## .. ..$ qraux: num [1:2] 1.18 1.05
## .. ..$ pivot: int [1:2] 1 2
## .. ..$ tol : num 1e-07
## .. ..$ rank : int 2
## .. ..- attr(*, "class")= chr "qr"
## ..$ df.residual : int 30
## ..$ xlevels : Named list()
## ..$ call : language lm(formula = mpg ~ wt, data = mtcars)
## ..$ terms :Classes 'terms', 'formula' language mpg ~ wt
## .. .. ..- attr(*, "variables")= language list(mpg, wt)
## .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. .. ..$ : chr "wt"
## .. .. ..- attr(*, "term.labels")= chr "wt"
## .. .. ..- attr(*, "order")= int 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## ..$ model :'data.frame': 32 obs. of 2 variables:
## .. ..$ mpg: num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## .. ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
## .. ..- attr(*, "terms")=Classes 'terms', 'formula' language mpg ~ wt
## .. .. .. ..- attr(*, "variables")= language list(mpg, wt)
## .. .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. .. .. ..$ : chr "wt"
## .. .. .. ..- attr(*, "term.labels")= chr "wt"
## .. .. .. ..- attr(*, "order")= int 1
## .. .. .. ..- attr(*, "intercept")= int 1
## .. .. .. ..- attr(*, "response")= int 1
## .. .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## ..- attr(*, "class")= chr "lm"
## $ terms :Class 'formula' language mpg ~ wt
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## $ model :'data.frame': 32 obs. of 2 variables:
## ..$ mpg: num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
## ..- attr(*, "terms")=Class 'formula' language mpg ~ wt
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## - attr(*, "class")= chr "lm"
## (Intercept) wt
## 37.285126 -5.344472
## [1] -5.344472
seq_along()## [1] 0.7389042
## [1] -1.098235
## [1] -0.0626907
## [1] 0.4441366
# Create new double vector: output
output <- vector("double", ncol(df1))
# Alter the loop
for (i in seq_along(df1)) {
# Change code to store result in output
output[i] <- median(df1[[i]])
}
# Print output
output## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
# Define example vector x
x <- 1:10
# Rewrite this snippet to refer to x
(x - min(x, na.rm = TRUE)) /
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))## [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
## [8] 0.7777778 0.8888889 1.0000000
# Define example vector x
x <- 1:10
# Define rng
rng <- range(x, na.rm = T)
# Rewrite this snippet to refer to the elements of rng
(x - rng[1]) /
(rng[2] - rng[1])## [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
## [8] 0.7777778 0.8888889 1.0000000
# Define example vector x
x <- 1:10
# Use the function template to create the rescale01 function
rescale01 <- function(x) {
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
# Test your function, call rescale01 using the vector x as the argument
rescale01(x)## [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
## [8] 0.7777778 0.8888889 1.0000000
# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3, 4)
# Count how many elements are missing in both x and y
sum(is.na(x) & is.na(y))## [1] 1
# Define x, y1 and y2
x <- c(NA, NA, NA)
y1 <- c( 1, NA, NA)
y2 <- c( 1, NA, NA, NA)
# Call both_na on x, y1
both_na(x, y1)## [1] 2
## [1] 3
c, mean )impute_mising, collapse_yearsremove_lastc is already they name of a functionnums is non conventional# Alter the mean_ci function
# Edit the mean_ci function using an if statement to check for the case when x is empty and if so, to produce the same warning as the code above then immediately return() c(-Inf, Inf).
mean_ci <- function(x, level = 0.95) {
if (length(x) == 0) {
warning("x was empty", call. = FALSE)
return(c(-Inf, Inf))
}
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
mean_ci(numeric(0))## [1] -Inf Inf
f <- function(x, y) {
x[is.na(x)] <- y
cat(sum(is.na(x)), y, "\n")
x
}
# Define a numeric vector x with the values 1, 2, NA, 4 and 5
x <- c(1, 2, NA, 4, 5)
# Call f() with the arguments x = x and y = 3
f(x = x, y = 3)## 0 3
## [1] 1 2 3 4 5
## 0 10
## [1] 1 2 10 4 5
NAs in x with the value given as y# Rename the function f() to replace_missings()
replace_missings <- function(x, replacement) {
# Change the name of the y argument to replacement
x[is.na(x)] <- replacement
cat(sum(is.na(x)), replacement, "\n")
x
}
# Rewrite the call on df$z to match our new names
df <- structure(list(z = c(0.473582631011786, -0.371809943354702, -0.88562980458499,
-0.768223158006804, 0.886983968322944, 0.238444716245814, -1.13439205742083,
NA, NA, -1.27240097583594)), .Names = "z", row.names = c(NA,
-10L), class = "data.frame")
df$z <- replace_missings(df$z, 0)## 0 0
## [1] 0.4735826 -0.3718099 -0.8856298 -0.7682232 0.8869840 0.2384447
## [7] -1.1343921 0.0000000 0.0000000 -1.2724010
replace_missings <- function(x, replacement) {
is_miss <- is.na(x)
x[is_miss] <- replacement
# Rewrite to use message()
message(sum(is_miss), " missings replaced by the value ", replacement)
x
}
# Check your new function by running on df$z
replace_missings(df$z, 0)## [1] 0.4735826 -0.3718099 -0.8856298 -0.7682232 0.8869840 0.2384447
## [7] -1.1343921 0.0000000 0.0000000 -1.2724010
# Initialize output vector
output <- vector("double", ncol(df1))
# Fill in the body of the for loop
for (i in seq_along(df1)) {
output[i] <- median(df1[[i]])
}
# View the result
output## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
# Turn this code into col_median()
output <- vector("double", ncol(df))
col_median <- function(df) {
for (i in seq_along(df)) {
output[[i]] <- median(df[[i]])
}
output
}
col_median(df1)## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
# Create col_mean() function to find column means
col_mean <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
}
col_mean(df1)## [1] 0.21356218 -0.95626180 -0.06994885 0.51866611
# Define col_sd() function
col_sd <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- sd(df[[i]])
}
output
}
col_sd(df1)## [1] 1.2302953 0.9066193 0.7034230 1.2889431
col_summary <- function(df, fun) {
output <- vector("numeric", length(df))
for (i in seq_along(df)) {
output[[i]] <- fun(df[[i]])
}
output
}
# Find the column medians using col_median() and col_summary()
col_median(df1)## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
## [1] 0.7389042 -1.0982346 -0.0626907 0.4441366
## [1] 0.21356218 -0.95626180 -0.06994885 0.51866611
## [1] 0.21356218 -0.95626180 -0.06994885 0.51866611
## [1] 2.0399632 0.9621661 0.5023385 1.8777572
.x.f to each elementmap returns a listmap_dbl returns a vector of doublesmap_lgl returns a vector of logicalsmap_int same for integersmap_chr same for characters.f## z
## 1 0.4735826
## 2 -0.3718099
## 3 -0.8856298
## 4 -0.7682232
## 5 0.8869840
## 6 0.2384447
## z
## -0.2833445
## z
## -0.185905
## z
## 0.7213903
## year engines seats speed
## 1 1956 4 102 232
## 2 1975 1 4 108
## 3 1977 2 139 432
## 4 1996 2 142 NA
## 5 2010 2 20 NA
## 6 NA 1 2 NA
## year engines seats speed
## NA 2.00000 68.16667 NA
## year engines seats speed
## 1982.80000 2.00000 68.16667 257.33333
# Find the 5th percentile of each column, excluding missing values
map_dbl(planes, quantile, probs = .05, na.rm = T)## year engines seats speed
## 1959.8 1.0 2.5 120.4
## A B C D
## TRUE FALSE TRUE TRUE
## A B C D
## "double" "character" "integer" "double"
## $A
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.4039 -0.1372 0.1694 0.1854 0.5858 1.4211
##
## $B
## Length Class Mode
## 10 character character
##
## $C
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.25 5.50 5.50 7.75 10.00
##
## $D
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.4324 -0.6868 -0.2348 -0.1268 0.6237 1.2318
## $a
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.7702 -0.9379 0.7389 0.2136 1.1021 1.4268
##
## $b
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.1469 -1.5504 -1.0982 -0.9563 -0.5882 0.8299
##
## $c
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.20476 -0.33035 -0.06269 -0.06995 0.17199 1.22003
##
## $d
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.3099 -0.4489 0.4441 0.5187 1.4289 2.7267
## $a
## [1] 0.9763800 0.8193699 0.9247780 0.6621110 1.0000000 0.1264128 0.0000000
## [8] 0.1263912 0.8072770 0.7623936
##
## $b
## [1] 1.0000000 0.3553432 0.3492247 0.1752596 0.5239921 0.5224524 0.7258844
## [8] 0.0718125 0.0000000 0.2757911
##
## $c
## [1] 0.4703795 0.0000000 0.5982757 0.3647366 0.4716139 0.7982322 0.4762885
## [8] 0.1412718 1.0000000 0.3592357
##
## $d
## [1] 0.2366551 0.2055043 0.6105489 1.0000000 0.7836893 0.1233262 0.7011169
## [8] 0.4286287 0.0000000 0.4404229
## $a
## [1] 0
##
## $b
## [1] 0
##
## $c
## [1] 0
##
## $d
## [1] 0
## $a
## [1] 0
##
## $b
## [1] 0
##
## $c
## [1] 0
##
## $d
## [1] 0
.f is [[list_of_results <- list(
list(a = 1, b = "A"),
list(a = 2, b = "C"),
list(a = 3, b = "D")
)
map_dbl(list_of_results, function(x) x[["a"]])## [1] 1 2 3
## [1] 1 2 3
## [1] 1 2 3
## List of 3
## $ 4:'data.frame': 11 obs. of 11 variables:
## ..$ mpg : num [1:11] 22.8 24.4 22.8 32.4 30.4 33.9 21.5 27.3 26 30.4 ...
## ..$ cyl : num [1:11] 4 4 4 4 4 4 4 4 4 4 ...
## ..$ disp: num [1:11] 108 146.7 140.8 78.7 75.7 ...
## ..$ hp : num [1:11] 93 62 95 66 52 65 97 66 91 113 ...
## ..$ drat: num [1:11] 3.85 3.69 3.92 4.08 4.93 4.22 3.7 4.08 4.43 3.77 ...
## ..$ wt : num [1:11] 2.32 3.19 3.15 2.2 1.61 ...
## ..$ qsec: num [1:11] 18.6 20 22.9 19.5 18.5 ...
## ..$ vs : num [1:11] 1 1 1 1 1 1 1 1 0 1 ...
## ..$ am : num [1:11] 1 0 0 1 1 1 0 1 1 1 ...
## ..$ gear: num [1:11] 4 4 4 4 4 4 3 4 5 5 ...
## ..$ carb: num [1:11] 1 2 2 1 2 1 1 1 2 2 ...
## $ 6:'data.frame': 7 obs. of 11 variables:
## ..$ mpg : num [1:7] 21 21 21.4 18.1 19.2 17.8 19.7
## ..$ cyl : num [1:7] 6 6 6 6 6 6 6
## ..$ disp: num [1:7] 160 160 258 225 168 ...
## ..$ hp : num [1:7] 110 110 110 105 123 123 175
## ..$ drat: num [1:7] 3.9 3.9 3.08 2.76 3.92 3.92 3.62
## ..$ wt : num [1:7] 2.62 2.88 3.21 3.46 3.44 ...
## ..$ qsec: num [1:7] 16.5 17 19.4 20.2 18.3 ...
## ..$ vs : num [1:7] 0 0 1 1 1 1 0
## ..$ am : num [1:7] 1 1 0 0 0 0 1
## ..$ gear: num [1:7] 4 4 3 3 4 4 5
## ..$ carb: num [1:7] 4 4 1 1 4 4 6
## $ 8:'data.frame': 14 obs. of 11 variables:
## ..$ mpg : num [1:14] 18.7 14.3 16.4 17.3 15.2 10.4 10.4 14.7 15.5 15.2 ...
## ..$ cyl : num [1:14] 8 8 8 8 8 8 8 8 8 8 ...
## ..$ disp: num [1:14] 360 360 276 276 276 ...
## ..$ hp : num [1:14] 175 245 180 180 180 205 215 230 150 150 ...
## ..$ drat: num [1:14] 3.15 3.21 3.07 3.07 3.07 2.93 3 3.23 2.76 3.15 ...
## ..$ wt : num [1:14] 3.44 3.57 4.07 3.73 3.78 ...
## ..$ qsec: num [1:14] 17 15.8 17.4 17.6 18 ...
## ..$ vs : num [1:14] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ am : num [1:14] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ gear: num [1:14] 3 3 3 3 3 3 3 3 3 3 ...
## ..$ carb: num [1:14] 2 4 3 3 3 4 4 4 2 2 ...
# Extract the first element into four_cyls
four_cyls <- cyl[[1]]
# Fit a linear regression of mpg on wt using four_cyls
lm(four_cyls$mpg ~ four_cyls$wt)##
## Call:
## lm(formula = four_cyls$mpg ~ four_cyls$wt)
##
## Coefficients:
## (Intercept) four_cyls$wt
## 39.571 -5.647
## $`4`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 39.571 -5.647
##
##
## $`6`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 28.41 -2.78
##
##
## $`8`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 23.868 -2.192
## $`4`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 39.571 -5.647
##
##
## $`6`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 28.41 -2.78
##
##
## $`8`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 23.868 -2.192
# Save the result from the previous exercise to the variable models
models <- map(cyl, ~ lm(mpg ~ wt, data = .))
# Use map and coef to get the coefficients for each model: coefs
coefs <- map(models, coef)
# Use string shortcut to extract the wt coefficient
map(coefs, "wt")## $`4`
## [1] -5.647025
##
## $`6`
## [1] -2.780106
##
## $`8`
## [1] -2.192438
coefs <- map(models, coef)
# use map_dbl with the numeric shortcut to pull out the second element
map_dbl(coefs, 2)## 4 6 8
## -5.647025 -2.780106 -2.192438
# Define models (don't change)
models <- mtcars %>%
split(mtcars$cyl) %>%
map(~ lm(mpg ~ wt, data = .))
# Rewrite to be a single command using pipes
summaries <- models %>%
map(summary) %>%
map_dbl("r.squared")
summaries## 4 6 8
## 0.5086326 0.4645102 0.4229655
safely() captures the successful result or the error, always returns a listpossible() always succeeds, you give it a default value to return when there is an errorquietly() captures printed output, messages, and warnings instead of capturing errors# Create safe_readLines() by passing readLines() to safely()
safe_readLines <- safely(readLines)
# Call safe_readLines() on "http://example.org"
safe_readLines("http://example.org")## $result
## [1] "<!doctype html>"
## [2] "<html>"
## [3] "<head>"
## [4] " <title>Example Domain</title>"
## [5] ""
## [6] " <meta charset=\"utf-8\" />"
## [7] " <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\" />"
## [8] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />"
## [9] " <style type=\"text/css\">"
## [10] " body {"
## [11] " background-color: #f0f0f2;"
## [12] " margin: 0;"
## [13] " padding: 0;"
## [14] " font-family: -apple-system, system-ui, BlinkMacSystemFont, \"Segoe UI\", \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;"
## [15] " "
## [16] " }"
## [17] " div {"
## [18] " width: 600px;"
## [19] " margin: 5em auto;"
## [20] " padding: 2em;"
## [21] " background-color: #fdfdff;"
## [22] " border-radius: 0.5em;"
## [23] " box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);"
## [24] " }"
## [25] " a:link, a:visited {"
## [26] " color: #38488f;"
## [27] " text-decoration: none;"
## [28] " }"
## [29] " @media (max-width: 700px) {"
## [30] " div {"
## [31] " margin: 0 auto;"
## [32] " width: auto;"
## [33] " }"
## [34] " }"
## [35] " </style> "
## [36] "</head>"
## [37] ""
## [38] "<body>"
## [39] "<div>"
## [40] " <h1>Example Domain</h1>"
## [41] " <p>This domain is for use in illustrative examples in documents. You may use this"
## [42] " domain in literature without prior coordination or asking for permission.</p>"
## [43] " <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>"
## [44] "</div>"
## [45] "</body>"
## [46] "</html>"
##
## $error
## NULL
## $result
## NULL
##
## $error
## <simpleError in file(con, "r"): cannot open the connection>
urls <- structure(list(
example = "http://example.org",
rproj = "http://www.r-project.org",
asdf = "http://asdfasdasdkfjlda"),
.Names = c("example", "rproj", "asdf"))
# Define safe_readLines()
safe_readLines <- safely(readLines)
# Use the safe_readLines() function with map(): html
html <- map(urls, safe_readLines)
# Call str() on html
str(html)## List of 3
## $ example:List of 2
## ..$ result: chr [1:46] "<!doctype html>" "<html>" "<head>" " <title>Example Domain</title>" ...
## ..$ error : NULL
## $ rproj :List of 2
## ..$ result: chr [1:125] "<!DOCTYPE html>" "<html lang=\"en\">" " <head>" " <meta charset=\"utf-8\">" ...
## ..$ error : NULL
## $ asdf :List of 2
## ..$ result: NULL
## ..$ error :List of 2
## .. ..$ message: chr "cannot open the connection"
## .. ..$ call : language file(con, "r")
## .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
## [1] "<!doctype html>"
## [2] "<html>"
## [3] "<head>"
## [4] " <title>Example Domain</title>"
## [5] ""
## [6] " <meta charset=\"utf-8\" />"
## [7] " <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\" />"
## [8] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />"
## [9] " <style type=\"text/css\">"
## [10] " body {"
## [11] " background-color: #f0f0f2;"
## [12] " margin: 0;"
## [13] " padding: 0;"
## [14] " font-family: -apple-system, system-ui, BlinkMacSystemFont, \"Segoe UI\", \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;"
## [15] " "
## [16] " }"
## [17] " div {"
## [18] " width: 600px;"
## [19] " margin: 5em auto;"
## [20] " padding: 2em;"
## [21] " background-color: #fdfdff;"
## [22] " border-radius: 0.5em;"
## [23] " box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);"
## [24] " }"
## [25] " a:link, a:visited {"
## [26] " color: #38488f;"
## [27] " text-decoration: none;"
## [28] " }"
## [29] " @media (max-width: 700px) {"
## [30] " div {"
## [31] " margin: 0 auto;"
## [32] " width: auto;"
## [33] " }"
## [34] " }"
## [35] " </style> "
## [36] "</head>"
## [37] ""
## [38] "<body>"
## [39] "<div>"
## [40] " <h1>Example Domain</h1>"
## [41] " <p>This domain is for use in illustrative examples in documents. You may use this"
## [42] " domain in literature without prior coordination or asking for permission.</p>"
## [43] " <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>"
## [44] "</div>"
## [45] "</body>"
## [46] "</html>"
## <simpleError in file(con, "r"): cannot open the connection>
# Define save_readLines() and html
safe_readLines <- safely(readLines)
html <- map(urls, safe_readLines)
# Examine the structure of transpose(html)
str(transpose(html))## List of 2
## $ result:List of 3
## ..$ example: chr [1:46] "<!doctype html>" "<html>" "<head>" " <title>Example Domain</title>" ...
## ..$ rproj : chr [1:125] "<!DOCTYPE html>" "<html lang=\"en\">" " <head>" " <meta charset=\"utf-8\">" ...
## ..$ asdf : NULL
## $ error :List of 3
## ..$ example: NULL
## ..$ rproj : NULL
## ..$ asdf :List of 2
## .. ..$ message: chr "cannot open the connection"
## .. ..$ call : language file(con, "r")
## .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
## List of 3
## $ example: chr [1:46] "<!doctype html>" "<html>" "<head>" " <title>Example Domain</title>" ...
## $ rproj : chr [1:125] "<!DOCTYPE html>" "<html lang=\"en\">" " <head>" " <meta charset=\"utf-8\">" ...
## $ asdf : NULL
## List of 3
## $ example: NULL
## $ rproj : NULL
## $ asdf :List of 2
## ..$ message: chr "cannot open the connection"
## ..$ call : language file(con, "r")
## ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
# Initialize some objects
safe_readLines <- safely(readLines)
html <- map(urls, safe_readLines)
res <- transpose(html)[["result"]]
errs <- transpose(html)[["error"]]
# Create a logical vector is_ok
is_ok <- map_lgl(errs, is_null)
# Extract the successful results
## ha, I just used map to shorten my results with head. : )
map(res[is_ok],head)## $example
## [1] "<!doctype html>" "<html>"
## [3] "<head>" " <title>Example Domain</title>"
## [5] "" " <meta charset=\"utf-8\" />"
##
## $rproj
## [1] "<!DOCTYPE html>"
## [2] "<html lang=\"en\">"
## [3] " <head>"
## [4] " <meta charset=\"utf-8\">"
## [5] " <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">"
## [6] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">"
## $asdf
## [1] "http://asdfasdasdkfjlda"
map2() - iterate over two argumentspmap() - iterate over many argumentsinvoke_map() - iterate over funtions and argumentsmap(), each has a whole family of functions:
map2_dpl, map2_lgl, pmap_dbl, etc# Create a list n containing the values: 5, 10, and 20
n <- list(5,10,20)
# Call map() on n with rnorm() to simulate three samples
map(n, rnorm)## [[1]]
## [1] -1.30136644 -0.26622681 -0.08317222 -1.08431619 -0.18742912
##
## [[2]]
## [1] -1.06304413 0.89222105 -0.30107376 1.25503563 1.34643393 -1.42228709
## [7] 0.10864957 -0.17363277 -0.22883322 0.04176509
##
## [[3]]
## [1] -1.54412049 2.04116295 -0.44066170 -0.20295451 -0.40985085 1.80484259
## [7] 1.87628246 0.30127762 0.61515891 1.26723190 -2.29276827 0.04851154
## [13] 1.47980920 -0.17798674 -1.11423571 -0.10788022 0.04255771 -0.52582469
## [19] 0.42746087 -1.42114043
# Initialize n
n <- list(5, 10, 20)
# Create a list mu containing the values: 1, 5, and 10
mu <- list(1, 5, 10)
# Edit to call map2() on n and mu with rnorm() to simulate three samples
map2(n, mu, rnorm)## [[1]]
## [1] 1.2583382 1.4043842 1.8427311 0.4561319 1.5641303
##
## [[2]]
## [1] 5.728895 4.780263 4.959692 5.814853 4.765957 4.226567 5.164756 6.299060
## [9] 5.154783 4.065903
##
## [[3]]
## [1] 10.361171 11.354237 7.865869 11.204416 11.491666 9.602063 10.489980
## [8] 8.623029 10.624383 10.934074 8.205274 9.267494 8.937338 9.672072
## [15] 9.440029 11.599572 7.577798 9.763647 10.763069 8.140366
# Initialize n and mu
n <- list(5, 10, 20)
mu <- list(1, 5, 10)
# Create a sd list with the values: 0.1, 1 and 0.1
sd <- list(0.1, 1, 0.1)
# Edit this call to pmap() to iterate over the sd list as well
pmap(list(n, mu, sd), rnorm)## [[1]]
## [1] 0.8439245 1.0264659 1.0091719 0.9575275 0.9970034
##
## [[2]]
## [1] 4.866385 3.038711 6.019981 4.848212 4.066897 5.240455 5.582616 4.983957
## [9] 4.800224 4.908284
##
## [[3]]
## [1] 9.890796 10.013686 10.075698 9.967273 9.961457 9.892728 10.024156
## [8] 10.058355 9.980263 9.848726 9.960526 9.988791 9.966049 9.880938
## [15] 9.938063 10.062316 10.101006 10.015639 10.040616 10.033779
## [[1]]
## [1] 0.9934738 1.0339686 0.8642611 1.1172105 1.0014440
##
## [[2]]
## [1] 4.493149 4.138373 4.441208 2.418655 3.876626 3.073693 4.419981 5.380088
## [9] 4.979955 6.741342
##
## [[3]]
## [1] 10.267840 10.050915 9.935477 10.139084 10.104342 10.053301 9.838134
## [8] 9.999052 9.919862 9.876453 9.905110 10.009046 9.916691 10.065494
## [15] 10.061442 9.948763 10.220314 9.995745 10.044660 10.052457
# Define list of functions
f <- list("rnorm", "runif", "rexp")
# Parameter list for rnorm()
rnorm_params <- list(mean = 10)
# Add a min element with value 0 and max element with value 5
runif_params <- list(min = 0, max = 5)
# Add a rate element with value 5
rexp_params <- list(rate = 5)
# Define params for each function
params <- list(
rnorm_params,
runif_params,
rexp_params
)
# Call invoke_map() on f supplying params as the second argument
invoke_map(f, params, n = 5)## [[1]]
## [1] 7.637301 8.885525 9.657942 9.472617 10.931148
##
## [[2]]
## [1] 0.4273974 0.4705302 4.4917094 3.9219005 1.4926565
##
## [[3]]
## [1] 0.008750712 0.041825338 0.073940083 0.284244685 0.028451292
walk() works just like map(), but is designed for functions called for their side effects# Define list of functions
f <- list(Normal = "rnorm", Uniform = "runif", Exp = "rexp")
# Define params
params <- list(
Normal = list(mean = 10),
Uniform = list(min = 0, max = 5),
Exp = list(rate = 5)
)
# Assign the simulated samples to sims
sims <- invoke_map(f, params, n = 50)
# Use walk() to make a histogram of each element in sims
walk(sims, hist)# Replace "Sturges" with reasonable breaks for each sample
breaks_list <- list(
Normal = seq(6, 16, 0.5),
Uniform = seq(0, 5, 0.25),
Exp = seq(0, 1.5, 0.1)
)
# Use walk2() to make histograms with the right breaks
walk2(sims, breaks_list, hist)# Turn this snippet into find_breaks()
find_breaks <- function(x) {
rng <- range(x, na.rm = TRUE)
seq(rng[1], rng[2], length.out = 30)
}
# Call find_breaks() on sims[[1]]
find_breaks(sims[[1]])## [1] 7.170137 7.362012 7.553887 7.745761 7.937636 8.129511 8.321385
## [8] 8.513260 8.705135 8.897009 9.088884 9.280759 9.472633 9.664508
## [15] 9.856383 10.048257 10.240132 10.432007 10.623882 10.815756 11.007631
## [22] 11.199506 11.391380 11.583255 11.775130 11.967004 12.158879 12.350754
## [29] 12.542628 12.734503
# Use map() to iterate find_breaks() over sims: nice_breaks
nice_breaks <- map(sims, find_breaks)
# Use nice_breaks as the second argument to walk2()
walk2(sims, nice_breaks, hist)# Increase sample size to 1000
sims <- invoke_map(f, params, n = 1000)
# Compute nice_breaks (don't change this)
nice_breaks <- map(sims, find_breaks)
# Create a vector nice_titles
nice_titles <- c(
"Normal(10, 1)",
"Uniform(0, 5)",
"Exp(5)"
)
# Use pwalk() instead of walk2()
pwalk(list(x = sims, breaks = nice_breaks, main = nice_titles), hist, xlab = "")## $Normal
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.854 9.290 9.961 9.949 10.595 12.936
##
## $Uniform
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01544 1.27345 2.52490 2.49769 3.74660 4.99279
##
## $Exp
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000245 0.051978 0.130253 0.195124 0.270168 1.792442
df[, vars]subset(df, x == y)data.frame(x = "a")# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)
both_na <- function(x, y) {
# Add stopifnot() to check length of x and y
stopifnot(length(x) == length(y))
sum(is.na(x) & is.na(y))
}
# Call both_na() on x and y
both_na(x, y)## Error in both_na(x, y): length(x) == length(y) is not TRUE
# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)
both_na <- function(x, y) {
# Replace condition with logical
if (length(x) != length(y)) {
# Replace "Error" with better message
stop("x and y must have the same length", call. = FALSE)
}
sum(is.na(x) & is.na(y))
}
# Call both_na()
both_na(x, y)## Error: x and y must have the same length
[ is a common source of surprises
drop = FALSE: df[x, , drop = FALSE]## $a
## [1] "integer"
##
## $b
## [1] "numeric"
##
## $y
## [1] "POSIXct" "POSIXt"
##
## $z
## [1] "ordered" "factor"
## y z
## [1,] "POSIXct" "ordered"
## [2,] "POSIXt" "factor"
# sapply calls
A <- sapply(df[1:4], class)
B <- sapply(df[3:4], class)
C <- sapply(df[1:2], class)
# Demonstrate type inconsistency
str(A)## List of 4
## $ a: chr "integer"
## $ b: chr "numeric"
## $ y: chr [1:2] "POSIXct" "POSIXt"
## $ z: chr [1:2] "ordered" "factor"
## chr [1:2, 1:2] "POSIXct" "POSIXt" "ordered" "factor"
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "y" "z"
## Named chr [1:2] "integer" "numeric"
## - attr(*, "names")= chr [1:2] "a" "b"
# Use map() to define X, Y and Z
X <- map(df[1:4], class)
Y <- map(df[3:4], class)
Z <- map(df[1:2], class)
# Use str() to check type consistency
str(X)## List of 4
## $ a: chr "integer"
## $ b: chr "numeric"
## $ y: chr [1:2] "POSIXct" "POSIXt"
## $ z: chr [1:2] "ordered" "factor"
## List of 2
## $ y: chr [1:2] "POSIXct" "POSIXt"
## $ z: chr [1:2] "ordered" "factor"
## List of 2
## $ a: chr "integer"
## $ b: chr "numeric"
col_classes <- function(df) {
# Assign list output to class_list
class_list <- map(df, class)
# Use map_chr() to extract first element in class_list
map_chr(class_list, 1)
}
# Check that our new function is type consistent
df %>% col_classes() %>% str()## Named chr [1:4] "integer" "numeric" "POSIXct" "ordered"
## - attr(*, "names")= chr [1:4] "a" "b" "y" "z"
## Named chr [1:2] "POSIXct" "ordered"
## - attr(*, "names")= chr [1:2] "y" "z"
## Named chr [1:2] "integer" "numeric"
## - attr(*, "names")= chr [1:2] "a" "b"
## or make our own error message
col_classes <- function(df) {
class_list <- map(df, class)
# Add a check that no element of class_list has length > 1
if (any(map_dbl(class_list, length) > 1)) {
stop("Some columns have more than one class", call. = FALSE)
}
# Use flatten_chr() to return a character vector
flatten_chr(class_list)
}
# Check that our new function is type consistent
df %>% col_classes() %>% str()## Error: Some columns have more than one class
## Error: Some columns have more than one class
## chr [1:2] "integer" "numeric"
big_x <- function(df, threshold) {
dplyr::filter(df, x > threshold)
}
# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, 7)## # A tibble: 2 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 1.35 Ideal G VS1 60.9 54 10471 7.18 7.15 4.36
## 2 1.63 Ideal F I1 62 55 7229 7.57 7.5 4.68
# Remove the x column from diamonds
diamonds_sub$x <- NULL
# Create variable x with value 1
x <- 1
# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, 7)## # A tibble: 0 x 9
## # ... with 9 variables: carat <dbl>, cut <ord>, color <ord>, clarity <ord>,
## # depth <dbl>, table <dbl>, price <int>, y <dbl>, z <dbl>
# Create a threshold column with value 100
diamonds_sub$threshold <- 100
# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, 7)## # A tibble: 0 x 10
## # ... with 10 variables: carat <dbl>, cut <ord>, color <ord>, clarity <ord>,
## # depth <dbl>, table <dbl>, price <int>, y <dbl>, z <dbl>, threshold <dbl>
big_x <- function(df, threshold) {
# Write a check for x not being in df
if (!'x' %in% names(df)) {
stop("df must contain variable called x", call. = FALSE)
}
# Write a check for threshold being in df
if ('threshold' %in% names(df)) {
stop("df must not contain variable called threshold", call. = FALSE)
}
dplyr::filter(df, x > threshold)
}
big_x(diamonds_sub, 7)## Error: df must contain variable called x
# Fit a regression model
fit <- lm(mpg ~ wt, data = mtcars)
# Look at the summary of the model
summary(fit)##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
# Set the global digits option to 2
options(digits = 2)
# Take another look at the summary
summary(fit)##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.543 -2.365 -0.125 1.410 6.873
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.285 1.878 19.86 < 2e-16 ***
## wt -5.344 0.559 -9.56 1.3e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3 on 30 degrees of freedom
## Multiple R-squared: 0.753, Adjusted R-squared: 0.745
## F-statistic: 91.4 on 1 and 30 DF, p-value: 1.29e-10