2 types of vectors:
Atomic vectors: 6 types -> logical, integer, double, character, complex, raw. Integer and double vectors are known as numeric vectors. They are homogeneous.
Lists: sometimes called recursive vectors because lists can obtain other lists. They are heterogeneous.
# 2 key properties:
# Type:
typeof(letters)
## [1] "character"
#> [1] "character"
typeof(1:10)
## [1] "integer"
#> [1] "integer"
#Length:
x <- list("a", "b", 1:10)
length(x)
## [1] 3
#> [1] 3
Augmented vectors are vectors to whom we can add attributes to create augmented vectors.
Logical vectors are the simplest type of atomic vector because they can take only three possible values: FALSE, TRUE, and NA.
1:10 %% 3 == 0
## [1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
#> [1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
c(TRUE, TRUE, FALSE, NA)
## [1] TRUE TRUE FALSE NA
#> [1] TRUE TRUE FALSE NA
Integer and double vectors are known collectively as numeric vectors.
typeof(1)
## [1] "double"
#> [1] "double"
typeof(1L)
## [1] "integer"
#> [1] "integer"
1.5L
## [1] 1.5
#> [1] 1.5
2 important differences between integers and doubles:
x <- sqrt(2) ^ 2
x
## [1] 2
#> [1] 2
x - 2
## [1] 4.440892e-16
#> [1] 4.440892e-16
c(-1, 0, 1) / 0
## [1] -Inf NaN Inf
#> [1] -Inf NaN Inf
Avoid using == to check for these other special values. Instead use the helper functions is.finite(), is.infinite(), and is.nan():
Character vectors are the most complex type of atomic vector, because each element of a character vector is a string, and a string can contain an arbitrary amount of data.
x <- "This is a reasonably long string."
#pryr::object_size(x)
#> 152 B
y <- rep(x, 1000)
#pryr::object_size(y)
#> 8.14 kB
NA # logical
## [1] NA
#> [1] NA
NA_integer_ # integer
## [1] NA
#> [1] NA
NA_real_ # double
## [1] NA
#> [1] NA
NA_character_ # character
## [1] NA
#> [1] NA
sample(10) + 10
## [1] 14 11 16 18 17 15 20 13 19 12
1:10 + 1:2
## [1] 2 4 4 6 6 8 8 10 10 12
#1:10 + 1:3
data.frame(a = 1:10, b = 1:2)
## a b
## 1 1 1
## 2 2 2
## 3 3 1
## 4 4 2
## 5 5 1
## 6 6 2
## 7 7 1
## 8 8 2
## 9 9 1
## 10 10 2
#data.frame(a = 1:10, b = 1:3)
x <- sample(10)
x
## [1] 6 8 9 3 10 5 1 4 2 7
x[c(5, 7)]
## [1] 10 1
x[x>5]
## [1] 6 8 9 10 7
Visualizing lists
x1 <- list(c(1, 2), c(3, 4))
# (1-2 3-4)
x2 <- list(list(1, 2), list(3, 4))
# ( (1-2) (3-4) )
x3 <- list(1, list(2, list(3)))
# Here 1 would be en"circled" by a smaller list, it would be "alone" in the big one: (1 (2(3)))
Subsetting
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
a
## $a
## [1] 1 2 3
##
## $b
## [1] "a string"
##
## $c
## [1] 3.141593
##
## $d
## $d[[1]]
## [1] -1
##
## $d[[2]]
## [1] -5
a[1:2]
## $a
## [1] 1 2 3
##
## $b
## [1] "a string"
# then: a = (1-2-3 "a string" 3.141525 (-1 -5))
str(a[1:2])
## List of 2
## $ a: int [1:3] 1 2 3
## $ b: chr "a string"
# a[1:2] = (1-2-3 "a string")
str(a[4])
## List of 1
## $ d:List of 2
## ..$ : num -1
## ..$ : num -5
# a[4] = ( (-1 -5))
a[[4]]
## [[1]]
## [1] -1
##
## [[2]]
## [1] -5
# a[[4]] = (-1 -5)
# double brackets remove the first list, so just one list ()
a[[4]][2]
## [[1]]
## [1] -5
#a[[4]][2] = (-5)
#Just 1 list, and second character
a[[4]][[2]]
## [1] -5
#a[[4][[2]] = -5
You can add class (like factors, dates, date-times, and tibbles) to atomic vectors => augmented vectors
Functions and iteration help reduce duplication.
# example from the cheatsheet
for(i in 1:4) {
j <- i + 10
print(j)
}
## [1] 11
## [1] 12
## [1] 13
## [1] 14
# example 1: numeric calculation - add 10
x <- 11:15
for (i in seq_along(x)) {
j <- x[i] + 10
print(j)
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# save output
y <- vector("integer", length(x))
for (i in seq_along(x)) {
y[i] <- x[i] + 10
print(y[i])
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# output
y
## [1] 21 22 23 24 25
# example 2: string operation - extract first letter
x2 <- c("abc", "xyz")
y2 <- vector("character", length(x))
for (i in seq_along(x2)) {
y2[i] <- x2[i] %>% str_extract("[a-z]")
print(y2[i])
}
## [1] "a"
## [1] "x"
Example from book
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
# instead of using copy-paste:
output <- vector("double", ncol(df)) # 1. output
for (i in seq_along(df)) { # 2. sequence
output[[i]] <- median(df[[i]]) # 3. body
}
output
## [1] -0.03177712 0.39590967 -0.50827137 -0.78145830
#> [1] -0.24576245 -0.28730721 -0.05669771 0.14426335
# example 1: numeric calculation - add 10
x <- 11:15
y <- vector("integer", length(x))
for (i in seq_along(x)) {
y[i] <- x[i] + 10
print(y[i])
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# output
y
## [1] 21 22 23 24 25
# using map function
x
## [1] 11 12 13 14 15
map(.x = x, .f = ~.x + 10)
## [[1]]
## [1] 21
##
## [[2]]
## [1] 22
##
## [[3]]
## [1] 23
##
## [[4]]
## [1] 24
##
## [[5]]
## [1] 25
map_dbl(.x = x, .f = ~.x + 10)
## [1] 21 22 23 24 25
add_10 <- function(x) {x +10}
11 %>% add_10()
## [1] 21
map_dbl(.x = x, .f = add_10)
## [1] 21 22 23 24 25
#same as line 275