Week15_Assignment

21-21.2

1.Write for loops to:

Compute the mean of every column in mtcars. Determine the type of each column in nycflights13::flights. Compute the number of unique values in each column of iris. Generate 10 random normals from distributions with means of -10, 0, 10, and 100. Think about the output, sequence, and body before you start writing the loop.

#To compute the mean of every column in mtcars.
output <- vector("double", ncol(mtcars))
names(output) <- names(mtcars)
for (i in names(mtcars)) {
  output[i] <- mean(mtcars[[i]])
}
output

##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

#Determine the type of each column in nycflights13::flights.
output <- vector("list", ncol(nycflights13::flights))
names(output) <- names(nycflights13::flights)
for (i in names(nycflights13::flights)) {
  output[[i]] <- class(nycflights13::flights[[i]])
}
output

## $year
## [1] "integer"
## 
## $month
## [1] "integer"
## 
## $day
## [1] "integer"
## 
## $dep_time
## [1] "integer"
## 
## $sched_dep_time
## [1] "integer"
## 
## $dep_delay
## [1] "numeric"
## 
## $arr_time
## [1] "integer"
## 
## $sched_arr_time
## [1] "integer"
## 
## $arr_delay
## [1] "numeric"
## 
## $carrier
## [1] "character"
## 
## $flight
## [1] "integer"
## 
## $tailnum
## [1] "character"
## 
## $origin
## [1] "character"
## 
## $dest
## [1] "character"
## 
## $air_time
## [1] "numeric"
## 
## $distance
## [1] "numeric"
## 
## $hour
## [1] "numeric"
## 
## $minute
## [1] "numeric"
## 
## $time_hour
## [1] "POSIXct" "POSIXt"

#To compute the number of unique values in each column of the iris dataset.
data("iris")
iris_uniq <- vector("double", ncol(iris))
names(iris_uniq) <- names(iris)
for (i in names(iris)) {
  iris_uniq[i] <- n_distinct(iris[[i]])
}
iris_uniq

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##           35           23           43           22            3

#To generate 10 random normals for each of  
# number to draw
n <- 10
# values of the mean
mu <- c(-10, 0, 10, 100)
normals <- vector("list", length(mu))
for (i in seq_along(normals)) {
  normals[[i]] <- rnorm(n, mean = mu[i])
}
normals

## [[1]]
##  [1]  -9.362650  -8.573368 -11.226029  -9.538742  -8.255746 -10.287863
##  [7]  -9.216611  -8.705565  -9.985233  -8.570075
## 
## [[2]]
##  [1]  0.45947486 -1.92192129 -1.06353937  0.04164079 -0.52661745 -2.86267927
##  [7] -3.38164360  0.58288051 -0.57724189  0.82807380
## 
## [[3]]
##  [1]  7.352514 10.801937  8.972469  9.272128 12.374769 12.561484 12.047757
##  [8] 10.993696  9.784919  9.738239
## 
## [[4]]
##  [1] 101.40032 100.58267 100.16035 100.48140  98.81068 100.12595 101.70835
##  [8]  99.98700 100.84064 100.65370

2.Eliminate the for loop in each of the following examples by taking advantage of an existing function that works with vectors:

out <- ""
for (x in letters) {
  out <- str_c(out, x)
}
out

## [1] "abcdefghijklmnopqrstuvwxyz"

#Since str_c() already works with vectors, use str_c() with the collapse argument to return a single string.
str_c(letters, collapse = "")

## [1] "abcdefghijklmnopqrstuvwxyz"

# rename the variable sd to something different because sd is the name of the function we want to use.
x <- sample(100)
sd. <- 0
for (i in seq_along(x)) {
  sd. <- sd. + (x[i] - mean(x))^2
}
sd. <- sqrt(sd. / (length(x) - 1))
sd.

## [1] 29.01149

sd(x)

## [1] 29.01149

#Or if there was a need to use the equation (e.g. for pedagogical reasons), then the functions mean() and sum() already work with vectors:
sqrt(sum((x - mean(x))^2) / (length(x) - 1))

## [1] 29.01149

x <- runif(100)
out <- vector("numeric", length(x))
out[1] <- x[1]
for (i in 2:length(x)) {
  out[i] <- out[i - 1] + x[i]
}
out

##   [1]  0.2220207  1.1064603  2.0850930  3.0317567  3.2536607  3.7253930
##   [7]  4.1686873  4.4846534  4.5199752  5.0170286  5.9873385  6.1714072
##  [13]  6.2634643  6.9980566  7.5283855  8.1173823  8.6651636  8.6667436
##  [19]  9.1193945 10.0135016 10.4082420 10.5968175 10.8544551 11.7685711
##  [25] 12.3306611 12.8039961 12.8954831 13.5097802 14.1533559 14.7196509
##  [31] 15.2892737 15.4006678 15.5730110 16.2639726 16.8231554 17.3737846
##  [37] 17.4941810 17.9804321 18.5313915 18.7302216 19.1932212 19.7526749
##  [43] 20.0693144 20.7702757 20.8305089 21.5744062 21.8087740 22.6899235
##  [49] 22.8368454 23.7550606 24.3633616 24.6578658 25.3916688 26.2611700
##  [55] 27.0853523 27.3019543 27.4776455 28.4237874 28.6003562 28.9295117
##  [61] 29.3025706 29.9568406 30.2616407 30.7023676 31.2409363 31.6633562
##  [67] 31.8448399 32.6984426 33.1713291 33.2474547 33.8143053 34.0775934
##  [73] 34.5026939 35.1973582 35.8158497 36.4898773 37.0546360 37.8474265
##  [79] 38.8140253 39.1975207 39.8491229 39.9884701 40.1548139 40.3786086
##  [85] 41.3626440 42.2568662 43.1867712 43.2770007 43.4591340 43.5715789
##  [91] 44.0854396 44.5524097 45.3262457 45.3866048 45.4857734 46.4251541
##  [97] 47.1101528 47.6926799 47.7804284 48.5773461

#The code above is calculating a cumulative sum. Use the function cumsum()
all.equal(cumsum(x), out)

## [1] TRUE

3.Combine your function writing and for loop skills:

Write a for loop that prints() the lyrics to the children’s song “Alice the camel”. Convert the nursery rhyme “ten in the bed” to a function. Generalize it to any number of people in any sleeping structure. Convert the song “99 bottles of beer on the wall” to a function. Generalize to any number of any vessel containing any liquid on surface.

#Alice the Camel
humps <- c("five", "four", "three", "two", "one", "no")
for (i in humps) {
  cat(str_c("Alice the camel has ", rep(i, 3), " humps.",
    collapse = "\n"
  ), "\n")
  if (i == "no") {
    cat("Now Alice is a horse.\n")
  } else {
    cat("So go, Alice, go.\n")
  }
  cat("\n")
}

## Alice the camel has five humps.
## Alice the camel has five humps.
## Alice the camel has five humps. 
## So go, Alice, go.
## 
## Alice the camel has four humps.
## Alice the camel has four humps.
## Alice the camel has four humps. 
## So go, Alice, go.
## 
## Alice the camel has three humps.
## Alice the camel has three humps.
## Alice the camel has three humps. 
## So go, Alice, go.
## 
## Alice the camel has two humps.
## Alice the camel has two humps.
## Alice the camel has two humps. 
## So go, Alice, go.
## 
## Alice the camel has one humps.
## Alice the camel has one humps.
## Alice the camel has one humps. 
## So go, Alice, go.
## 
## Alice the camel has no humps.
## Alice the camel has no humps.
## Alice the camel has no humps. 
## Now Alice is a horse.

#Ten in the Bed
numbers <- c(
  "ten", "nine", "eight", "seven", "six", "five",
  "four", "three", "two", "one"
)
for (i in numbers) {
  cat(str_c("There were ", i, " in the bed\n"))
  cat("and the little one said\n")
  if (i == "one") {
    cat("I'm lonely...")
  } else {
    cat("Roll over, roll over\n")
    cat("So they all rolled over and one fell out.\n")
  }
  cat("\n")
}

## There were ten in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were nine in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were eight in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were seven in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were six in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were five in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were four in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were three in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were two in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
## 
## There were one in the bed
## and the little one said
## I'm lonely...

# Ninety-Nine Bottles of Beer on the Wall
bottles <- function(n) {
  if (n > 1) {
    str_c(n, " bottles")
  } else if (n == 1) {
    "1 bottle"
  } else {
    "no more bottles"
  }
}

beer_bottles <- function(total_bottles) {
  # print each lyric
  for (current_bottles in seq(total_bottles, 0)) {
    # first line
    cat(str_to_sentence(str_c(bottles(current_bottles), " of beer on the wall, ", bottles(current_bottles), " of beer.\n")))   
    # second line
    if (current_bottles > 0) {
      cat(str_c(
        "Take one down and pass it around, ", bottles(current_bottles - 1),
        " of beer on the wall.\n"
      ))          
    } else {
      cat(str_c("Go to the store and buy some more, ", bottles(total_bottles), " of beer on the wall.\n"))                }
    cat("\n")
  }
}
beer_bottles(3)

## 3 bottles of beer on the wall, 3 bottles of beer.
## Take one down and pass it around, 2 bottles of beer on the wall.
## 
## 2 bottles of beer on the wall, 2 bottles of beer.
## Take one down and pass it around, 1 bottle of beer on the wall.
## 
## 1 bottle of beer on the wall, 1 bottle of beer.
## Take one down and pass it around, no more bottles of beer on the wall.
## 
## No more bottles of beer on the wall, no more bottles of beer.
## Go to the store and buy some more, 3 bottles of beer on the wall.

21.3

2.What happens if you use for (nm in names(x)) and x has no names? What if only some of the elements are named? What if the names are not unique?

x <- c(11, 12, 13)
print(names(x))

## NULL

#> NULL
for (nm in names(x)) {
  print(nm)
  print(x[[nm]])
}

#If there only some names, then we get an error for trying to access an element without a name.
x <- c(a = 11, 12, c = 13)
names(x)

## [1] "a" ""  "c"

for (nm in names(x)) {
  print(nm)
  print(x[[nm]])
}

## [1] "a"
## [1] 11
## [1] ""

## Error in x[[nm]]: subscript out of bounds

# if the vector contains duplicate names, then x[[nm]] returns the first element with that name.
x <- c(a = 11, a = 12, c = 13)
names(x)

## [1] "a" "a" "c"

for (nm in names(x)) {
  print(nm)
  print(x[[nm]])
}

## [1] "a"
## [1] 11
## [1] "a"
## [1] 11
## [1] "c"
## [1] 13

3.Write a function that prints the mean of each numeric column in a data frame, along with its name. For example, show_mean(iris) would print:

show_mean <- function(df, digits = 2) {
  # Get max length of all variable names in the dataset
  maxstr <- max(str_length(names(df)))
  for (nm in names(df)) {
    if (is.numeric(df[[nm]])) {
      cat(
        str_c(str_pad(str_c(nm, ":"), maxstr + 1L, side = "right"),
          format(mean(df[[nm]]), digits = digits, nsmall = digits),
          sep = " "
        ),
        "\n"
      )
    }
  }
}
show_mean(iris)

## Sepal.Length: 5.84 
## Sepal.Width:  3.06 
## Petal.Length: 3.76 
## Petal.Width:  1.20

21.5

1.Write code that uses one of the map functions to:

#To calculate the mean of every column in mtcars, apply the function mean() to each column, and use map_dbl, since the results are numeric.
map_dbl(mtcars, mean)

##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

#To calculate the type of every column in nycflights13::flights apply the function typeof(), discussed in the section on Vector basics, and use map_chr(), since the results are character.
map_chr(nycflights13::flights, typeof)

##           year          month            day       dep_time sched_dep_time 
##      "integer"      "integer"      "integer"      "integer"      "integer" 
##      dep_delay       arr_time sched_arr_time      arr_delay        carrier 
##       "double"      "integer"      "integer"       "double"    "character" 
##         flight        tailnum         origin           dest       air_time 
##      "integer"    "character"    "character"    "character"       "double" 
##       distance           hour         minute      time_hour 
##       "double"       "double"       "double"       "double"

#The function n_distinct() calculates the number of unique values in a vector.
map_int(iris, n_distinct)

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##           35           23           43           22            3

map_dbl(iris, n_distinct)

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##           35           23           43           22            3

map_int(iris, function(x) length(unique(x)))

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##           35           23           43           22            3

map_int(iris, function(x) length(unique(x)))

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##           35           23           43           22            3

#Generate 10 random normals for each of  μ= −10 ,  0 ,  10 , and  100
map(c(-10, 0, 10, 100), ~rnorm(n = 10, mean = .))

## [[1]]
##  [1] -10.040509  -8.377949 -10.661548 -10.425124 -10.634487 -11.219784
##  [7] -10.928110 -10.017908 -10.469457 -10.178721
## 
## [[2]]
##  [1]  1.8834881 -2.4043614  0.2893253  0.8607293 -0.3409145 -1.1159858
##  [7]  1.5947398  0.9129872  0.1505654 -1.6111096
## 
## [[3]]
##  [1]  9.910664 10.192248  7.860912 12.691659  9.613689 10.435574 10.763063
##  [8]  8.677833  9.136049  9.317961
## 
## [[4]]
##  [1] 100.56351 101.74481  98.31660  96.89202 100.01892 102.57980 100.04675
##  [8] 100.41889  98.88398 101.32893

4.What does map(-2:2, rnorm, n = 5) do? Why?

What does map_dbl(-2:2, rnorm, n = 5) do? Why?

map(-2:2, rnorm, n = 5)

## [[1]]
## [1] -2.8280861 -0.2764502 -2.2690586 -3.1893446 -1.0048463
## 
## [[2]]
## [1] -0.08235484 -2.14569398 -2.79742729 -1.29668012  1.47352741
## 
## [[3]]
## [1] -0.82558960  0.26163935 -0.92619358 -0.03439889 -1.50698776
## 
## [[4]]
## [1] 0.9743125 0.7518809 1.1481643 0.9519482 2.6241986
## 
## [[5]]
## [1] 1.0090739 1.7840416 0.4703622 1.0096348 0.7561794

map(-2:2, rnorm, n = 5) %>%
  flatten_dbl()

##  [1] -0.6174964 -1.2030087 -2.1791177 -2.8232231 -1.8997098 -1.1687931
##  [7] -2.5292588 -0.3670285 -1.5292841 -0.2286154  0.3447048 -0.1286533
## [13]  0.2121289  1.0923856  0.2097538  2.5108405  0.2947574  0.8935331
## [19]  0.7898775  2.4039278  1.0823736  1.5407692  0.3419537  0.6421964
## [25]  1.1420430