Ch. 20

Intro

Vector basics

typeof(letters)

## [1] "character"

#> [1] "character"
typeof(1:10)

## [1] "integer"

#> [1] "integer"

x <- list("a", "b", 1:10)
length(x)

## [1] 3

#> [1] 3

Important types of atomic vectors

1:10 %% 3 == 0

##  [1] FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE

#>  [1] FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE

c(TRUE, TRUE, FALSE, NA)

## [1]  TRUE  TRUE FALSE    NA

#> [1]  TRUE  TRUE FALSE    NA

typeof(1)

## [1] "double"

#> [1] "double"
typeof(1L)

## [1] "integer"

#> [1] "integer"
1.5L

## [1] 1.5

#> [1] 1.5

x <- sqrt(2) ^ 2
x

## [1] 2

#> [1] 2
x - 2

## [1] 4.440892e-16

#> [1] 4.440892e-16

c(-1, 0, 1) / 0

## [1] -Inf  NaN  Inf

#> [1] -Inf  NaN  Inf

NA            # logical

## [1] NA

#> [1] NA
NA_integer_   # integer

## [1] NA

#> [1] NA
NA_real_      # double

## [1] NA

#> [1] NA
NA_character_ # character

## [1] NA

#> [1] NA

Using atomic vectors

x <- sample(20, 100, replace = TRUE)
y <- x > 10
sum(y)  # how many are greater than 10?

## [1] 42

#> [1] 38
mean(y) # what proportion are greater than 10?

## [1] 0.42

#> [1] 0.38

if (length(x)) {
  # do something
}

## NULL

typeof(c(TRUE, 1L))

## [1] "integer"

#> [1] "integer"
typeof(c(1L, 1.5))

## [1] "double"

#> [1] "double"
typeof(c(1.5, "a"))

## [1] "character"

#> [1] "character"

sample(10) + 100

##  [1] 106 102 108 107 110 101 105 109 103 104

#>  [1] 107 104 103 109 102 101 106 110 105 108
runif(10) > 0.5

##  [1]  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE

#>  [1] FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

1:10 + 1:2

##  [1]  2  4  4  6  6  8  8 10 10 12

#>  [1]  2  4  4  6  6  8  8 10 10 12

1:10 + 1:3

## Warning in 1:10 + 1:3: longer object length is not a multiple of shorter object
## length

##  [1]  2  4  6  5  7  9  8 10 12 11

#> Warning in 1:10 + 1:3: longer object length is not a multiple of shorter object
#> length
#>  [1]  2  4  6  5  7  9  8 10 12 11

c(x = 1, y = 2, z = 4)

## x y z 
## 1 2 4

#> x y z 
#> 1 2 4

set_names(1:3, c("a", "b", "c"))

## a b c 
## 1 2 3

#> a b c 
#> 1 2 3

x <- c(abc = 1, def = 2, xyz = 5)
x[c("xyz", "def")]

## xyz def 
##   5   2

#> xyz def 
#>   5   2

Recursive vectors

str(x)

##  Named num [1:3] 1 2 5
##  - attr(*, "names")= chr [1:3] "abc" "def" "xyz"

#> List of 3
#>  $ : num 1
#>  $ : num 2
#>  $ : num 3

x_named <- list(a = 1, b = 2, c = 3)
str(x_named)

## List of 3
##  $ a: num 1
##  $ b: num 2
##  $ c: num 3

#> List of 3
#>  $ a: num 1
#>  $ b: num 2
#>  $ c: num 3

y <- list("a", 1L, 1.5, TRUE)
str(y)

## List of 4
##  $ : chr "a"
##  $ : int 1
##  $ : num 1.5
##  $ : logi TRUE

#> List of 4
#>  $ : chr "a"
#>  $ : int 1
#>  $ : num 1.5
#>  $ : logi TRUE

x1 <- list(c(1, 2), c(3, 4))
x2 <- list(list(1, 2), list(3, 4))
x3 <- list(1, list(2, list(3)))

a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))

Attributes

x <- 1:10
attr(x, "greeting")

## NULL

#> NULL
attr(x, "greeting") <- "Hi!"
attr(x, "farewell") <- "Bye!"
attributes(x)

## $greeting
## [1] "Hi!"
## 
## $farewell
## [1] "Bye!"

#> $greeting
#> [1] "Hi!"
#> 
#> $farewell
#> [1] "Bye!"

Augmented vectors

x <- factor(c("ab", "cd", "ab"), levels = c("ab", "cd", "ef"))
typeof(x)

## [1] "integer"

#> [1] "integer"
attributes(x)

## $levels
## [1] "ab" "cd" "ef"
## 
## $class
## [1] "factor"

#> $levels
#> [1] "ab" "cd" "ef"
#> 
#> $class
#> [1] "factor"

tb <- tibble::tibble(x = 1:5, y = 5:1)
typeof(tb)

## [1] "list"

#> [1] "list"
attributes(tb)

## $class
## [1] "tbl_df"     "tbl"        "data.frame"
## 
## $row.names
## [1] 1 2 3 4 5
## 
## $names
## [1] "x" "y"

#> $class
#> [1] "tbl_df"     "tbl"        "data.frame"
#> 
#> $row.names
#> [1] 1 2 3 4 5
#> 
#> $names
#> [1] "x" "y"

Ch 21

For Loops

df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

output <- vector("double", ncol(df))  # 1. output
for (i in seq_along(df)) {            # 2. sequence
  output[[i]] <- median(df[[i]])      # 3. body
}
output

## [1] -0.6256641 -0.6144015 -0.3892650 -0.3625807

#> [1] -0.24576245 -0.28730721 -0.05669771  0.14426335

df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}

df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)

flip <- function() sample(c("T", "H"), 1)

flips <- 0
nheads <- 0

while (nheads < 3) {
  if (flip() == "H") {
    nheads <- nheads + 1
  } else {
    nheads <- 0
  }
  flips <- flips + 1
}
flips

## [1] 12

#> [1] 21

For loops vs functionals

df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

output <- vector("double", length(df))
for (i in seq_along(df)) {
  output[[i]] <- mean(df[[i]])
}
output

## [1]  0.162536547  0.006615033 -0.267736445 -0.166334372

#> [1] -0.3260369  0.1356639  0.4291403 -0.2498034

col_mean <- function(df) {
  output <- vector("double", length(df))
  for (i in seq_along(df)) {
    output[i] <- mean(df[[i]])
  }
  output
}

col_median <- function(df) {
  output <- vector("double", length(df))
  for (i in seq_along(df)) {
    output[i] <- median(df[[i]])
  }
  output
}
col_sd <- function(df) {
  output <- vector("double", length(df))
  for (i in seq_along(df)) {
    output[i] <- sd(df[[i]])
  }
  output
}

The map functions

map_dbl(df, mean)

##            a            b            c            d 
##  0.162536547  0.006615033 -0.267736445 -0.166334372

#>          a          b          c          d 
#> -0.3260369  0.1356639  0.4291403 -0.2498034
map_dbl(df, median)

##           a           b           c           d 
## -0.10213680  0.29603751 -0.18454087 -0.01080874

#>           a           b           c           d 
#> -0.51850298  0.02779864  0.17295591 -0.61163819
map_dbl(df, sd)

##         a         b         c         d 
## 1.3869881 1.2709210 0.8418680 0.6646794

#>         a         b         c         d 
#> 0.9214834 0.4848945 0.9816016 1.1563324

df %>% map_dbl(mean)

##            a            b            c            d 
##  0.162536547  0.006615033 -0.267736445 -0.166334372

#>          a          b          c          d 
#> -0.3260369  0.1356639  0.4291403 -0.2498034
df %>% map_dbl(median)

##           a           b           c           d 
## -0.10213680  0.29603751 -0.18454087 -0.01080874

#>           a           b           c           d 
#> -0.51850298  0.02779864  0.17295591 -0.61163819
df %>% map_dbl(sd)

##         a         b         c         d 
## 1.3869881 1.2709210 0.8418680 0.6646794

#>         a         b         c         d 
#> 0.9214834 0.4848945 0.9816016 1.1563324

models <- mtcars %>% 
  split(.$cyl) %>% 
  map(function(df) lm(mpg ~ wt, data = df))

models <- mtcars %>% 
  split(.$cyl) %>% 
  map(~lm(mpg ~ wt, data = .))

models %>% 
  map(summary) %>% 
  map_dbl(~.$r.squared)

##         4         6         8 
## 0.5086326 0.4645102 0.4229655

#>         4         6         8 
#> 0.5086326 0.4645102 0.4229655

models %>% 
  map(summary) %>% 
  map_dbl("r.squared")

##         4         6         8 
## 0.5086326 0.4645102 0.4229655

#>         4         6         8 
#> 0.5086326 0.4645102 0.4229655

CodeAlong12

Josh Crosswhite

2025-04-24

Ch. 20

Intro

Vector basics

Important types of atomic vectors

Using atomic vectors

Recursive vectors

Attributes

Augmented vectors

Ch 21

For Loops

For loops vs functionals

The map functions