Ch20 vectors

Introduction

Vector basics

# vector basics
typeof(letters)
## [1] "character"
typeof(1:10)
## [1] "integer"
x <- list("a", "b", 1:10)
length(x)
## [1] 3

Important types of automic vector

# logical
c(TRUE, TRUE, FALSE, NA)
## [1]  TRUE  TRUE FALSE    NA
# numeric
typeof(1)
## [1] "double"
typeof(1L)
## [1] "integer"
# special values
c(-1, 0, 1) / 0
## [1] -Inf  NaN  Inf
# character
typeof("hello")
## [1] "character"
# missing values
NA
## [1] NA
NA_integer_
## [1] NA
NA_real_
## [1] NA
NA_character_
## [1] NA

Using automic vectors

sample(10) + 10
##  [1] 18 13 17 15 11 20 16 14 19 12
1:10 + 1:2
##  [1]  2  4  4  6  6  8  8 10 10 12
1:10 + 1:3
## Warning in 1:10 + 1:3: longer object length is not a multiple of shorter object
## length
##  [1]  2  4  6  5  7  9  8 10 12 11
data.frame(a = 1:10, b = 1:2)
##     a b
## 1   1 1
## 2   2 2
## 3   3 1
## 4   4 2
## 5   5 1
## 6   6 2
## 7   7 1
## 8   8 2
## 9   9 1
## 10 10 2
# data.frame(a = 1:10, b = 1:3)
x <- sample(10)
x
##  [1]  3  6  2  9  5  4  7 10  8  1
x[c(5, 7)]
## [1] 5 7
x[x>5]
## [1]  6  9  7 10  8

Recursive vectors

a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
a
## $a
## [1] 1 2 3
## 
## $b
## [1] "a string"
## 
## $c
## [1] 3.141593
## 
## $d
## $d[[1]]
## [1] -1
## 
## $d[[2]]
## [1] -5
a[1:2]
## $a
## [1] 1 2 3
## 
## $b
## [1] "a string"
a[[4]]
## [[1]]
## [1] -1
## 
## [[2]]
## [1] -5
a[[4]][2]
## [[1]]
## [1] -5
a[[4]][[2]]
## [1] -5

Attributes

x <- 1:10
attr(x, "greeting")
## NULL
attr(x, "greeting") <- "Hi!"
attr(x, "farewell") <- "Bye!"
attributes(x)
## $greeting
## [1] "Hi!"
## 
## $farewell
## [1] "Bye!"

Augmented Vectors

x <- factor(c("ab", "cd", "ab"), levels = c("ab", "cd", "ef"))
typeof(x)
## [1] "integer"
attributes(x)
## $levels
## [1] "ab" "cd" "ef"
## 
## $class
## [1] "factor"
x <- as.Date("1971-01-01")
unclass(x)
## [1] 365
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "Date"
x <- lubridate::ymd_hm("1970-01-01 01:00")
unclass(x)
## [1] 3600
## attr(,"tzone")
## [1] "UTC"
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "POSIXct" "POSIXt" 
## 
## $tzone
## [1] "UTC"
attr(x, "tzone") <- "US/Pacific"
x
## [1] "1969-12-31 17:00:00 PST"
attr(x, "tzone") <- "US/Eastern"
x
## [1] "1969-12-31 20:00:00 EST"

Ch21 Iteration

Introduction

For loops

# example from the cheatsheet
for (i in 1:4){
  j <- i + 10
  print(j)
}
## [1] 11
## [1] 12
## [1] 13
## [1] 14
# example 1: numeric calculation - add 10
x <- 11:15

for (i in seq_along(x)){
  j <- x[i] + 10
  print(j)
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# save output
y <- vector("integer", length(x))

for (i in seq_along(x)){
  y[i] <- x[i] + 10
  print(y[i])
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# output
y
## [1] 21 22 23 24 25
# example 2: string operation - extract first letter
x <- c("abc", "xyz")

y <- vector("character", length(x))

for (i in seq_along(x)){
  y[i] <- x[i] %>% str_extract("[a-z]")
  print(y[i])
}
## [1] "a"
## [1] "x"
# output
y
## [1] "a" "x"

For loop variations

# modifying an existing object
df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}

for (i in seq_along(df)) {
  df[[i]] <- rescale01(df[[i]])
}
df
## # A tibble: 10 × 4
##         a     b     c      d
##     <dbl> <dbl> <dbl>  <dbl>
##  1 0.350  0.212 0     1     
##  2 0      0.592 0.127 0     
##  3 0.867  0.265 0.194 0.0185
##  4 0.916  0.969 1     0.592 
##  5 0.281  0     0.341 0.711 
##  6 0.785  0.875 0.888 0.270 
##  7 1      0.501 0.919 0.485 
##  8 0.776  1     0.593 0.785 
##  9 0.0779 0.697 0.324 0.535 
## 10 0.545  0.235 0.649 0.332

For loops vs functionals

df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

# for loop
output <- vector("double", length(df))
for (i in seq_along(df)) {
  output[[i]] <- mean(df[[i]])
}
output
## [1]  0.049788937 -0.008714384 -0.342289224 -0.142325754
# functional approach
col_summary <- function(df, fun) {
  out <- vector("double", length(df))
  for (i in seq_along(df)) {
    out[i] <- fun(df[[i]])
  }
  out
}

col_summary(df, mean)
## [1]  0.049788937 -0.008714384 -0.342289224 -0.142325754
col_summary(df, median)
## [1]  0.079361537 -0.234297005 -0.626088530 -0.004532156

The map functions

# example 1: numeric calculation - add 10
x <- 11:15

y <- vector("integer", length(x))

for (i in seq_along(x)){
  y[i] <- x[i] + 10
  print(y[i])
}
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
# output
y
## [1] 21 22 23 24 25
# using map function
x
## [1] 11 12 13 14 15
map(.x = x, .f = ~.x + 10)
## [[1]]
## [1] 21
## 
## [[2]]
## [1] 22
## 
## [[3]]
## [1] 23
## 
## [[4]]
## [1] 24
## 
## [[5]]
## [1] 25
map_dbl(.x = x, .f = ~.x + 10)
## [1] 21 22 23 24 25
add_10 <- function(x) {x + 10}
11 %>% add_10()
## [1] 21
map_dbl(.x = x, .f = add_10)
## [1] 21 22 23 24 25

Dealing Failure

# safely()
safe_log <- safely(log)
str(safe_log(10))
## List of 2
##  $ result: num 2.3
##  $ error : NULL
str(safe_log("a"))
## List of 2
##  $ result: NULL
##  $ error :List of 2
##   ..$ message: chr "non-numeric argument to mathematical function"
##   ..$ call   : language .f(...)
##   ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
# using safely() with map
x <- list(1, 10, "a")
y <- x %>% map(safely(log)) %>% transpose()

# possibly() - returns default value on error
x %>% map_dbl(possibly(log, NA_real_))
## [1] 0.000000 2.302585       NA