Ch. 20
Intro
Vector basics
typeof(letters)
## [1] "character"
#> [1] "character"
typeof(1:10)
## [1] "integer"
#> [1] "integer"
x <- list("a", "b", 1:10)
length(x)
## [1] 3
#> [1] 3
Important types of atomic vectors
1:10 %% 3 == 0
## [1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
#> [1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
c(TRUE, TRUE, FALSE, NA)
## [1] TRUE TRUE FALSE NA
#> [1] TRUE TRUE FALSE NA
typeof(1)
## [1] "double"
#> [1] "double"
typeof(1L)
## [1] "integer"
#> [1] "integer"
1.5L
## [1] 1.5
#> [1] 1.5
x <- sqrt(2) ^ 2
x
## [1] 2
#> [1] 2
x - 2
## [1] 4.440892e-16
#> [1] 4.440892e-16
c(-1, 0, 1) / 0
## [1] -Inf NaN Inf
#> [1] -Inf NaN Inf
NA # logical
## [1] NA
#> [1] NA
NA_integer_ # integer
## [1] NA
#> [1] NA
NA_real_ # double
## [1] NA
#> [1] NA
NA_character_ # character
## [1] NA
#> [1] NA
Using atomic vectors
x <- sample(20, 100, replace = TRUE)
y <- x > 10
sum(y) # how many are greater than 10?
## [1] 42
#> [1] 38
mean(y) # what proportion are greater than 10?
## [1] 0.42
#> [1] 0.38
if (length(x)) {
# do something
}
## NULL
typeof(c(TRUE, 1L))
## [1] "integer"
#> [1] "integer"
typeof(c(1L, 1.5))
## [1] "double"
#> [1] "double"
typeof(c(1.5, "a"))
## [1] "character"
#> [1] "character"
sample(10) + 100
## [1] 106 102 108 107 110 101 105 109 103 104
#> [1] 107 104 103 109 102 101 106 110 105 108
runif(10) > 0.5
## [1] TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE FALSE
#> [1] FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
1:10 + 1:2
## [1] 2 4 4 6 6 8 8 10 10 12
#> [1] 2 4 4 6 6 8 8 10 10 12
1:10 + 1:3
## Warning in 1:10 + 1:3: longer object length is not a multiple of shorter object
## length
## [1] 2 4 6 5 7 9 8 10 12 11
#> Warning in 1:10 + 1:3: longer object length is not a multiple of shorter object
#> length
#> [1] 2 4 6 5 7 9 8 10 12 11
c(x = 1, y = 2, z = 4)
## x y z
## 1 2 4
#> x y z
#> 1 2 4
set_names(1:3, c("a", "b", "c"))
## a b c
## 1 2 3
#> a b c
#> 1 2 3
x <- c(abc = 1, def = 2, xyz = 5)
x[c("xyz", "def")]
## xyz def
## 5 2
#> xyz def
#> 5 2
Recursive vectors
str(x)
## Named num [1:3] 1 2 5
## - attr(*, "names")= chr [1:3] "abc" "def" "xyz"
#> List of 3
#> $ : num 1
#> $ : num 2
#> $ : num 3
x_named <- list(a = 1, b = 2, c = 3)
str(x_named)
## List of 3
## $ a: num 1
## $ b: num 2
## $ c: num 3
#> List of 3
#> $ a: num 1
#> $ b: num 2
#> $ c: num 3
y <- list("a", 1L, 1.5, TRUE)
str(y)
## List of 4
## $ : chr "a"
## $ : int 1
## $ : num 1.5
## $ : logi TRUE
#> List of 4
#> $ : chr "a"
#> $ : int 1
#> $ : num 1.5
#> $ : logi TRUE
x1 <- list(c(1, 2), c(3, 4))
x2 <- list(list(1, 2), list(3, 4))
x3 <- list(1, list(2, list(3)))
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
Attributes
x <- 1:10
attr(x, "greeting")
## NULL
#> NULL
attr(x, "greeting") <- "Hi!"
attr(x, "farewell") <- "Bye!"
attributes(x)
## $greeting
## [1] "Hi!"
##
## $farewell
## [1] "Bye!"
#> $greeting
#> [1] "Hi!"
#>
#> $farewell
#> [1] "Bye!"
Augmented vectors
x <- factor(c("ab", "cd", "ab"), levels = c("ab", "cd", "ef"))
typeof(x)
## [1] "integer"
#> [1] "integer"
attributes(x)
## $levels
## [1] "ab" "cd" "ef"
##
## $class
## [1] "factor"
#> $levels
#> [1] "ab" "cd" "ef"
#>
#> $class
#> [1] "factor"
tb <- tibble::tibble(x = 1:5, y = 5:1)
typeof(tb)
## [1] "list"
#> [1] "list"
attributes(tb)
## $class
## [1] "tbl_df" "tbl" "data.frame"
##
## $row.names
## [1] 1 2 3 4 5
##
## $names
## [1] "x" "y"
#> $class
#> [1] "tbl_df" "tbl" "data.frame"
#>
#> $row.names
#> [1] 1 2 3 4 5
#>
#> $names
#> [1] "x" "y"
Ch 21
For Loops
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
output <- vector("double", ncol(df)) # 1. output
for (i in seq_along(df)) { # 2. sequence
output[[i]] <- median(df[[i]]) # 3. body
}
output
## [1] -0.6256641 -0.6144015 -0.3892650 -0.3625807
#> [1] -0.24576245 -0.28730721 -0.05669771 0.14426335
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
rescale01 <- function(x) {
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)
flip <- function() sample(c("T", "H"), 1)
flips <- 0
nheads <- 0
while (nheads < 3) {
if (flip() == "H") {
nheads <- nheads + 1
} else {
nheads <- 0
}
flips <- flips + 1
}
flips
## [1] 12
#> [1] 21
For loops vs functionals
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
output <- vector("double", length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
## [1] 0.162536547 0.006615033 -0.267736445 -0.166334372
#> [1] -0.3260369 0.1356639 0.4291403 -0.2498034
col_mean <- function(df) {
output <- vector("double", length(df))
for (i in seq_along(df)) {
output[i] <- mean(df[[i]])
}
output
}
col_median <- function(df) {
output <- vector("double", length(df))
for (i in seq_along(df)) {
output[i] <- median(df[[i]])
}
output
}
col_sd <- function(df) {
output <- vector("double", length(df))
for (i in seq_along(df)) {
output[i] <- sd(df[[i]])
}
output
}
The map functions
map_dbl(df, mean)
## a b c d
## 0.162536547 0.006615033 -0.267736445 -0.166334372
#> a b c d
#> -0.3260369 0.1356639 0.4291403 -0.2498034
map_dbl(df, median)
## a b c d
## -0.10213680 0.29603751 -0.18454087 -0.01080874
#> a b c d
#> -0.51850298 0.02779864 0.17295591 -0.61163819
map_dbl(df, sd)
## a b c d
## 1.3869881 1.2709210 0.8418680 0.6646794
#> a b c d
#> 0.9214834 0.4848945 0.9816016 1.1563324
df %>% map_dbl(mean)
## a b c d
## 0.162536547 0.006615033 -0.267736445 -0.166334372
#> a b c d
#> -0.3260369 0.1356639 0.4291403 -0.2498034
df %>% map_dbl(median)
## a b c d
## -0.10213680 0.29603751 -0.18454087 -0.01080874
#> a b c d
#> -0.51850298 0.02779864 0.17295591 -0.61163819
df %>% map_dbl(sd)
## a b c d
## 1.3869881 1.2709210 0.8418680 0.6646794
#> a b c d
#> 0.9214834 0.4848945 0.9816016 1.1563324
models <- mtcars %>%
split(.$cyl) %>%
map(function(df) lm(mpg ~ wt, data = df))
models <- mtcars %>%
split(.$cyl) %>%
map(~lm(mpg ~ wt, data = .))
models %>%
map(summary) %>%
map_dbl(~.$r.squared)
## 4 6 8
## 0.5086326 0.4645102 0.4229655
#> 4 6 8
#> 0.5086326 0.4645102 0.4229655
models %>%
map(summary) %>%
map_dbl("r.squared")
## 4 6 8
## 0.5086326 0.4645102 0.4229655
#> 4 6 8
#> 0.5086326 0.4645102 0.4229655