Chapter 20 Vectors

Introduction

Vector Basics

# 2 key properties
# type
typeof(letters)
## [1] "character"
# character
typeof(1:10)
## [1] "integer"
#integer


#length
x <- list("a", "b", 1:10)
length(x)
## [1] 3

Important types of automatic vector

Logical

1:10 %% 3 == 0
##  [1] FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE
c(TRUE, TRUE, FALSE, NA)
## [1]  TRUE  TRUE FALSE    NA

Numeric

typeof(1)
## [1] "double"
typeof(1L)
## [1] "integer"
1.5L
## [1] 1.5

2 important differences between integers and doubles:

  • Doubles are approximations. ex: what is square of the square root of two
x <- sqrt(2) ^ 2
x
## [1] 2
x-2
## [1] 4.440892e-16
  • Integers have one special value and that is NA Doubles have four: NA, NaN, Inf, and - Inf
c(-1, 0, 1)/0
## [1] -Inf  NaN  Inf

Character

Character vectors are the most complex type of vector because each element of a character vector is a string, and a string can contain an arbitrary amount of data.

x <- "This is a reasonably long string."
# pryr::object_size(x)
#> 152 B
y <- rep(x, 1000)
# pryr::object_size(y)
#> 8.14 kB

Missing Values

NA # logical
## [1] NA
NA_integer_ #integer
## [1] NA
NA_real_ # double
## [1] NA
NA_character_ # character
## [1] NA

Using atomic vector

sample(10) + 10
##  [1] 17 20 14 13 16 15 19 18 12 11
1:10 + 1:2
##  [1]  2  4  4  6  6  8  8 10 10 12
# 1:10 + 1:3
data.frame(a = 1:10, b = 1:2)
##     a b
## 1   1 1
## 2   2 2
## 3   3 1
## 4   4 2
## 5   5 1
## 6   6 2
## 7   7 1
## 8   8 2
## 9   9 1
## 10 10 2
# data.frame(a = 1:10, b = 1:3)
x <- sample(10)
x
##  [1]  7  6  2  8 10  3  5  4  1  9
x[c(5:7)]
## [1] 10  3  5
# 5:7 numbers in sample
x[x>5]
## [1]  7  6  8 10  9

Recursive vector (lists)

Visualizing lists

x1 <- list(c(1, 2), c(3, 4))
x2 <- list(list(1, 2), list(3, 4))
x3 <- list(1, list(2, list(3)))

Subsetting

a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
a
## $a
## [1] 1 2 3
## 
## $b
## [1] "a string"
## 
## $c
## [1] 3.141593
## 
## $d
## $d[[1]]
## [1] -1
## 
## $d[[2]]
## [1] -5
a[1:2]
## $a
## [1] 1 2 3
## 
## $b
## [1] "a string"
str(a[1:2])
## List of 2
##  $ a: int [1:3] 1 2 3
##  $ b: chr "a string"
#> List of 2
#>  $ a: int [1:3] 1 2 3
#>  $ b: chr "a string"
str(a[4])
## List of 1
##  $ d:List of 2
##   ..$ : num -1
##   ..$ : num -5
#> List of 1
#>  $ d:List of 2
#>   ..$ : num -1
#>   ..$ : num -5
a[[4]]
## [[1]]
## [1] -1
## 
## [[2]]
## [1] -5
a[[4]][2]
## [[1]]
## [1] -5
a[[4]][[2]]
## [1] -5

Attributes

Augmented vectors

You can add class (like factors, dates, date-times, and tibbles) to atomic vectors = augmented vectors

Chapter 21 Iteration

Introduction

Help reduce duplication

For loops

# example from cheatsheet

for(i in 1:4) {
    j <- i + 10
    print(j)
}
## [1] 11
## [1] 12
## [1] 13
## [1] 14
# example 1: numeric calculation add 10
x <- 11:15

for(i in seq_along(x)) {
    j <- x[i] +10
    print(j)
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# save output
y <- vector("integer", length(x))

for(i in seq_along(x)) {
    y[i] <- x[i] +10
    print(y[i])
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# output
y
## [1] 21 22 23 24 25
# example 2: string operations = extract first letter
x2 <- c("abc", "xyz")

y2 <- vector("character", length(x2))

for (i in seq_along(x2)) {
    y2[i] <- x2[i] %>% str_extract("[a-z]")
    print(y2[i])
}
## [1] "a"
## [1] "x"

Example from book

df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
output <- vector("double", ncol(df))  # 1. output
for (i in seq_along(df)) {            # 2. sequence
  output[[i]] <- median(df[[i]])      # 3. body
}
output
## [1] -0.33245027  0.19882249 -0.02766785  0.06046159
#> [1] -0.24576245 -0.28730721 -0.05669771  0.14426335

The map functions

# example 1: numeric calculation add 10
x <- 11:15

y <- vector("integer", length(x))

for(i in seq_along(x)) {
    y[i] <- x[i] +10
    print(y[i])
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
#output
y
## [1] 21 22 23 24 25
# using map function
x
## [1] 11 12 13 14 15
map(.x = x, .f = ~.x + 10)
## [[1]]
## [1] 21
## 
## [[2]]
## [1] 22
## 
## [[3]]
## [1] 23
## 
## [[4]]
## [1] 24
## 
## [[5]]
## [1] 25
map_dbl(.x = x, .f = ~.x + 10)
## [1] 21 22 23 24 25
add_10 <- function(x) {x + 10}
11 %>% add_10()
## [1] 21
map_dbl(.x = x, .f = add_10)
## [1] 21 22 23 24 25

Dealing failure

Mapping over multiple arguments