# tricky_list has a regression model stored in it.
# Let's see if we can drill down and pull out the slope estimate corresponding to the wt variable.
# Guess where the regression model is stored
names(tricky_list)
# Use names() and str() on the model element
names(tricky_list[["model"]])
str(tricky_list[["model"]])
# Subset the coefficients element
tricky_list[["model"]][["coefficients"]]
# Subset the wt element
tricky_list[["model"]][["coefficients"]][["wt"]]
a = list(2, 3, 5, 7, 11, 13)
for(i in 1 : length(a)) {
print(a[[i]])
}
## [1] 2
## [1] 3
## [1] 5
## [1] 7
## [1] 11
## [1] 13
df = data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df
## a b c d
## 1 1.27207119 0.2330034 0.16066405 0.3086231
## 2 -0.67788805 -1.3324355 -0.77956676 -0.9367044
## 3 -1.57230799 -0.4823942 -0.03195792 1.5708032
## 4 -0.92337911 0.8126277 0.51724567 0.7787041
## 5 -1.00007431 -0.1927306 -0.65482097 -0.6214809
## 6 0.57200743 -1.3304990 -0.96793095 -0.7076832
## 7 -0.88153966 1.1016151 -0.15646724 -0.7871502
## 8 -0.54637169 1.0693052 1.87900356 0.1654758
## 9 0.19904137 0.7091375 -0.31666585 -0.1352767
## 10 0.01994523 -0.2265446 1.92180653 -1.3774047
df[1]
## a
## 1 1.27207119
## 2 -0.67788805
## 3 -1.57230799
## 4 -0.92337911
## 5 -1.00007431
## 6 0.57200743
## 7 -0.88153966
## 8 -0.54637169
## 9 0.19904137
## 10 0.01994523
for (i in 1 : ncol(df)) {
print(median(df[[i]]))
}
## [1] -0.6121299
## [1] 0.02013642
## [1] -0.09421258
## [1] -0.3783788
### Save the result to output vector
# Create new double vector: output
output = vector("double", ncol(df))
# Alter the loop
for (i in seq_along(df)) {
output[[i]] = median(df[[i]])
}
# Print output
output
## [1] -0.61212987 0.02013642 -0.09421258 -0.37837878
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3, 4)
# Turn this snippet into a function: both_na()
both_na = function(x, y) {
return(sum(is.na(x) & is.na(y)))
}
f2 <- function(x) {
if (length(x) <= 1) return(NULL)
x[-length(x)]
}
mean_ci <- function(x, level = 0.95) {
if (length(x) == 0) {
warning("`x` was empty", call. = FALSE)
interval <- c(-Inf, Inf)
} else {
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
interval <- mean(x) +
se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
interval
}
x = c(1, 2, NA, 4, 5)
f <- function(x, y) {
x[is.na(x)] <- y
cat(sum(is.na(x)), y, "\n")
x
}
f(x = x, y = 3)
## 0 3
## [1] 1 2 3 4 5
f(x = x, y = 10)
## 0 10
## [1] 1 2 10 4 5
z = c(NA, NA, -0.10618832, 1.27298018, -1.50027365, -0.17863732, 0.08291387, -0.94013111, 0.17204559, -0.19802701)
df = data.frame(z)
df
## z
## 1 NA
## 2 NA
## 3 -0.10618832
## 4 1.27298018
## 5 -1.50027365
## 6 -0.17863732
## 7 0.08291387
## 8 -0.94013111
## 9 0.17204559
## 10 -0.19802701
replace_missings <- function(x, replacement) {
is_miss <- is.na(x)
x[is_miss] <- replacement
# Rewrite to use message()
message(sum(is_miss), " missings replaced by the value ", replacement, ".\n")
x
}
# Check your new function by running on df$z
df$z = replace_missings(df$z, 0)
## 2 missings replaced by the value 0.
df
## z
## 1 0.00000000
## 2 0.00000000
## 3 -0.10618832
## 4 1.27298018
## 5 -1.50027365
## 6 -0.17863732
## 7 0.08291387
## 8 -0.94013111
## 9 0.17204559
## 10 -0.19802701
col_median <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- median(df[[i]])
}
output
}
col_mean <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
}
df = data.frame(a = rnorm(10), b = rnorm(10), c = rnorm(10), d = rnorm(10))
df
## a b c d
## 1 -0.19147619 0.74405728 -0.04165540 -1.415688661
## 2 -0.07873108 0.07572426 1.94396436 0.220628709
## 3 -0.09874482 -0.68824642 0.69102664 -1.006757652
## 4 0.66262529 0.41961151 -0.36087407 -0.066990957
## 5 -0.49803126 0.02491136 -1.20721582 0.550277622
## 6 1.84255104 -0.71041416 0.28037727 -0.679044497
## 7 0.82206754 1.00412151 0.09889471 0.473156931
## 8 -1.03645754 -0.97757050 0.09874980 -0.063416672
## 9 -1.20522212 0.36128055 -1.84000666 0.345021644
## 10 0.81312718 2.63900718 1.06275792 0.005860636
sapply(df, mean)
## a b c d
## 0.10317080 0.28924826 0.07260187 -0.16369529
library(purrr)
map_dbl(df, mean)
## a b c d
## 0.10317080 0.28924826 0.07260187 -0.16369529
The map functions differ in their return type
Advantages of the map functions in purrr
# data frames, iterate over columns
df = data.frame(a = 1:10, b = 11:20)
map(df, mean)
## $a
## [1] 5.5
##
## $b
## [1] 15.5
# list, iterate over elements
l = list(a = 1:10, b = 11:20)
map(l, mean)
## $a
## [1] 5.5
##
## $b
## [1] 15.5
# vectors, iterate over elements
vec = c(a = 1, b = 2)
map(vec, mean)
## $a
## [1] 1
##
## $b
## [1] 2
# Find the mean of each column
map_dbl(planes, mean)
# Find the mean of each column, excluding missing values
map_dbl(planes, mean, na.rm = TRUE)
# Find the 5th percentile of each column, excluding missing values
map_dbl(planes, quantile, na.rm = TRUE, probs = 0.05 )