Ch20 vectors
Introduction
Vector basics
# vector basics
typeof(letters)
## [1] "character"
typeof(1:10)
## [1] "integer"
x <- list("a", "b", 1:10)
length(x)
## [1] 3
Important types of automic vector
# logical
c(TRUE, TRUE, FALSE, NA)
## [1] TRUE TRUE FALSE NA
# numeric
typeof(1)
## [1] "double"
typeof(1L)
## [1] "integer"
# special values
c(-1, 0, 1) / 0
## [1] -Inf NaN Inf
# character
typeof("hello")
## [1] "character"
# missing values
NA
## [1] NA
NA_integer_
## [1] NA
NA_real_
## [1] NA
NA_character_
## [1] NA
Using automic vectors
sample(10) + 10
## [1] 18 13 17 15 11 20 16 14 19 12
1:10 + 1:2
## [1] 2 4 4 6 6 8 8 10 10 12
1:10 + 1:3
## Warning in 1:10 + 1:3: longer object length is not a multiple of shorter object
## length
## [1] 2 4 6 5 7 9 8 10 12 11
data.frame(a = 1:10, b = 1:2)
## a b
## 1 1 1
## 2 2 2
## 3 3 1
## 4 4 2
## 5 5 1
## 6 6 2
## 7 7 1
## 8 8 2
## 9 9 1
## 10 10 2
# data.frame(a = 1:10, b = 1:3)
x <- sample(10)
x
## [1] 3 6 2 9 5 4 7 10 8 1
x[c(5, 7)]
## [1] 5 7
x[x>5]
## [1] 6 9 7 10 8
Recursive vectors
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
a
## $a
## [1] 1 2 3
##
## $b
## [1] "a string"
##
## $c
## [1] 3.141593
##
## $d
## $d[[1]]
## [1] -1
##
## $d[[2]]
## [1] -5
a[1:2]
## $a
## [1] 1 2 3
##
## $b
## [1] "a string"
a[[4]]
## [[1]]
## [1] -1
##
## [[2]]
## [1] -5
a[[4]][2]
## [[1]]
## [1] -5
a[[4]][[2]]
## [1] -5
Attributes
x <- 1:10
attr(x, "greeting")
## NULL
attr(x, "greeting") <- "Hi!"
attr(x, "farewell") <- "Bye!"
attributes(x)
## $greeting
## [1] "Hi!"
##
## $farewell
## [1] "Bye!"
Augmented Vectors
x <- factor(c("ab", "cd", "ab"), levels = c("ab", "cd", "ef"))
typeof(x)
## [1] "integer"
attributes(x)
## $levels
## [1] "ab" "cd" "ef"
##
## $class
## [1] "factor"
x <- as.Date("1971-01-01")
unclass(x)
## [1] 365
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "Date"
x <- lubridate::ymd_hm("1970-01-01 01:00")
unclass(x)
## [1] 3600
## attr(,"tzone")
## [1] "UTC"
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "POSIXct" "POSIXt"
##
## $tzone
## [1] "UTC"
attr(x, "tzone") <- "US/Pacific"
x
## [1] "1969-12-31 17:00:00 PST"
attr(x, "tzone") <- "US/Eastern"
x
## [1] "1969-12-31 20:00:00 EST"
Ch21 Iteration
Introduction
For loops
# example from the cheatsheet
for (i in 1:4){
j <- i + 10
print(j)
}
## [1] 11
## [1] 12
## [1] 13
## [1] 14
# example 1: numeric calculation - add 10
x <- 11:15
for (i in seq_along(x)){
j <- x[i] + 10
print(j)
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# save output
y <- vector("integer", length(x))
for (i in seq_along(x)){
y[i] <- x[i] + 10
print(y[i])
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# output
y
## [1] 21 22 23 24 25
# example 2: string operation - extract first letter
x <- c("abc", "xyz")
y <- vector("character", length(x))
for (i in seq_along(x)){
y[i] <- x[i] %>% str_extract("[a-z]")
print(y[i])
}
## [1] "a"
## [1] "x"
# output
y
## [1] "a" "x"
For loop variations
# modifying an existing object
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
rescale01 <- function(x) {
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
for (i in seq_along(df)) {
df[[i]] <- rescale01(df[[i]])
}
df
## # A tibble: 10 × 4
## a b c d
## <dbl> <dbl> <dbl> <dbl>
## 1 0.350 0.212 0 1
## 2 0 0.592 0.127 0
## 3 0.867 0.265 0.194 0.0185
## 4 0.916 0.969 1 0.592
## 5 0.281 0 0.341 0.711
## 6 0.785 0.875 0.888 0.270
## 7 1 0.501 0.919 0.485
## 8 0.776 1 0.593 0.785
## 9 0.0779 0.697 0.324 0.535
## 10 0.545 0.235 0.649 0.332
For loops vs functionals
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
# for loop
output <- vector("double", length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
## [1] 0.049788937 -0.008714384 -0.342289224 -0.142325754
# functional approach
col_summary <- function(df, fun) {
out <- vector("double", length(df))
for (i in seq_along(df)) {
out[i] <- fun(df[[i]])
}
out
}
col_summary(df, mean)
## [1] 0.049788937 -0.008714384 -0.342289224 -0.142325754
col_summary(df, median)
## [1] 0.079361537 -0.234297005 -0.626088530 -0.004532156
The map functions
# example 1: numeric calculation - add 10
x <- 11:15
y <- vector("integer", length(x))
for (i in seq_along(x)){
y[i] <- x[i] + 10
print(y[i])
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# output
y
## [1] 21 22 23 24 25
# using map function
x
## [1] 11 12 13 14 15
map(.x = x, .f = ~.x + 10)
## [[1]]
## [1] 21
##
## [[2]]
## [1] 22
##
## [[3]]
## [1] 23
##
## [[4]]
## [1] 24
##
## [[5]]
## [1] 25
map_dbl(.x = x, .f = ~.x + 10)
## [1] 21 22 23 24 25
add_10 <- function(x) {x + 10}
11 %>% add_10()
## [1] 21
map_dbl(.x = x, .f = add_10)
## [1] 21 22 23 24 25
Dealing Failure
# safely()
safe_log <- safely(log)
str(safe_log(10))
## List of 2
## $ result: num 2.3
## $ error : NULL
str(safe_log("a"))
## List of 2
## $ result: NULL
## $ error :List of 2
## ..$ message: chr "non-numeric argument to mathematical function"
## ..$ call : language .f(...)
## ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
# using safely() with map
x <- list(1, 10, "a")
y <- x %>% map(safely(log)) %>% transpose()
# possibly() - returns default value on error
x %>% map_dbl(possibly(log, NA_real_))
## [1] 0.000000 2.302585 NA