Compute the mean of every column in mtcars. Determine the type of each column in nycflights13::flights. Compute the number of unique values in each column of iris. Generate 10 random normals from distributions with means of -10, 0, 10, and 100. Think about the output, sequence, and body before you start writing the loop.
#To compute the mean of every column in mtcars.
output <- vector("double", ncol(mtcars))
names(output) <- names(mtcars)
for (i in names(mtcars)) {
output[i] <- mean(mtcars[[i]])
}
output
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
#Determine the type of each column in nycflights13::flights.
output <- vector("list", ncol(nycflights13::flights))
names(output) <- names(nycflights13::flights)
for (i in names(nycflights13::flights)) {
output[[i]] <- class(nycflights13::flights[[i]])
}
output
## $year
## [1] "integer"
##
## $month
## [1] "integer"
##
## $day
## [1] "integer"
##
## $dep_time
## [1] "integer"
##
## $sched_dep_time
## [1] "integer"
##
## $dep_delay
## [1] "numeric"
##
## $arr_time
## [1] "integer"
##
## $sched_arr_time
## [1] "integer"
##
## $arr_delay
## [1] "numeric"
##
## $carrier
## [1] "character"
##
## $flight
## [1] "integer"
##
## $tailnum
## [1] "character"
##
## $origin
## [1] "character"
##
## $dest
## [1] "character"
##
## $air_time
## [1] "numeric"
##
## $distance
## [1] "numeric"
##
## $hour
## [1] "numeric"
##
## $minute
## [1] "numeric"
##
## $time_hour
## [1] "POSIXct" "POSIXt"
#To compute the number of unique values in each column of the iris dataset.
data("iris")
iris_uniq <- vector("double", ncol(iris))
names(iris_uniq) <- names(iris)
for (i in names(iris)) {
iris_uniq[i] <- n_distinct(iris[[i]])
}
iris_uniq
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 35 23 43 22 3
#To generate 10 random normals for each of
# number to draw
n <- 10
# values of the mean
mu <- c(-10, 0, 10, 100)
normals <- vector("list", length(mu))
for (i in seq_along(normals)) {
normals[[i]] <- rnorm(n, mean = mu[i])
}
normals
## [[1]]
## [1] -9.362650 -8.573368 -11.226029 -9.538742 -8.255746 -10.287863
## [7] -9.216611 -8.705565 -9.985233 -8.570075
##
## [[2]]
## [1] 0.45947486 -1.92192129 -1.06353937 0.04164079 -0.52661745 -2.86267927
## [7] -3.38164360 0.58288051 -0.57724189 0.82807380
##
## [[3]]
## [1] 7.352514 10.801937 8.972469 9.272128 12.374769 12.561484 12.047757
## [8] 10.993696 9.784919 9.738239
##
## [[4]]
## [1] 101.40032 100.58267 100.16035 100.48140 98.81068 100.12595 101.70835
## [8] 99.98700 100.84064 100.65370
out <- ""
for (x in letters) {
out <- str_c(out, x)
}
out
## [1] "abcdefghijklmnopqrstuvwxyz"
#Since str_c() already works with vectors, use str_c() with the collapse argument to return a single string.
str_c(letters, collapse = "")
## [1] "abcdefghijklmnopqrstuvwxyz"
# rename the variable sd to something different because sd is the name of the function we want to use.
x <- sample(100)
sd. <- 0
for (i in seq_along(x)) {
sd. <- sd. + (x[i] - mean(x))^2
}
sd. <- sqrt(sd. / (length(x) - 1))
sd.
## [1] 29.01149
sd(x)
## [1] 29.01149
#Or if there was a need to use the equation (e.g. for pedagogical reasons), then the functions mean() and sum() already work with vectors:
sqrt(sum((x - mean(x))^2) / (length(x) - 1))
## [1] 29.01149
x <- runif(100)
out <- vector("numeric", length(x))
out[1] <- x[1]
for (i in 2:length(x)) {
out[i] <- out[i - 1] + x[i]
}
out
## [1] 0.2220207 1.1064603 2.0850930 3.0317567 3.2536607 3.7253930
## [7] 4.1686873 4.4846534 4.5199752 5.0170286 5.9873385 6.1714072
## [13] 6.2634643 6.9980566 7.5283855 8.1173823 8.6651636 8.6667436
## [19] 9.1193945 10.0135016 10.4082420 10.5968175 10.8544551 11.7685711
## [25] 12.3306611 12.8039961 12.8954831 13.5097802 14.1533559 14.7196509
## [31] 15.2892737 15.4006678 15.5730110 16.2639726 16.8231554 17.3737846
## [37] 17.4941810 17.9804321 18.5313915 18.7302216 19.1932212 19.7526749
## [43] 20.0693144 20.7702757 20.8305089 21.5744062 21.8087740 22.6899235
## [49] 22.8368454 23.7550606 24.3633616 24.6578658 25.3916688 26.2611700
## [55] 27.0853523 27.3019543 27.4776455 28.4237874 28.6003562 28.9295117
## [61] 29.3025706 29.9568406 30.2616407 30.7023676 31.2409363 31.6633562
## [67] 31.8448399 32.6984426 33.1713291 33.2474547 33.8143053 34.0775934
## [73] 34.5026939 35.1973582 35.8158497 36.4898773 37.0546360 37.8474265
## [79] 38.8140253 39.1975207 39.8491229 39.9884701 40.1548139 40.3786086
## [85] 41.3626440 42.2568662 43.1867712 43.2770007 43.4591340 43.5715789
## [91] 44.0854396 44.5524097 45.3262457 45.3866048 45.4857734 46.4251541
## [97] 47.1101528 47.6926799 47.7804284 48.5773461
#The code above is calculating a cumulative sum. Use the function cumsum()
all.equal(cumsum(x), out)
## [1] TRUE
Write a for loop that prints() the lyrics to the children’s song “Alice the camel”. Convert the nursery rhyme “ten in the bed” to a function. Generalize it to any number of people in any sleeping structure. Convert the song “99 bottles of beer on the wall” to a function. Generalize to any number of any vessel containing any liquid on surface.
#Alice the Camel
humps <- c("five", "four", "three", "two", "one", "no")
for (i in humps) {
cat(str_c("Alice the camel has ", rep(i, 3), " humps.",
collapse = "\n"
), "\n")
if (i == "no") {
cat("Now Alice is a horse.\n")
} else {
cat("So go, Alice, go.\n")
}
cat("\n")
}
## Alice the camel has five humps.
## Alice the camel has five humps.
## Alice the camel has five humps.
## So go, Alice, go.
##
## Alice the camel has four humps.
## Alice the camel has four humps.
## Alice the camel has four humps.
## So go, Alice, go.
##
## Alice the camel has three humps.
## Alice the camel has three humps.
## Alice the camel has three humps.
## So go, Alice, go.
##
## Alice the camel has two humps.
## Alice the camel has two humps.
## Alice the camel has two humps.
## So go, Alice, go.
##
## Alice the camel has one humps.
## Alice the camel has one humps.
## Alice the camel has one humps.
## So go, Alice, go.
##
## Alice the camel has no humps.
## Alice the camel has no humps.
## Alice the camel has no humps.
## Now Alice is a horse.
#Ten in the Bed
numbers <- c(
"ten", "nine", "eight", "seven", "six", "five",
"four", "three", "two", "one"
)
for (i in numbers) {
cat(str_c("There were ", i, " in the bed\n"))
cat("and the little one said\n")
if (i == "one") {
cat("I'm lonely...")
} else {
cat("Roll over, roll over\n")
cat("So they all rolled over and one fell out.\n")
}
cat("\n")
}
## There were ten in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were nine in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were eight in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were seven in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were six in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were five in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were four in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were three in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were two in the bed
## and the little one said
## Roll over, roll over
## So they all rolled over and one fell out.
##
## There were one in the bed
## and the little one said
## I'm lonely...
# Ninety-Nine Bottles of Beer on the Wall
bottles <- function(n) {
if (n > 1) {
str_c(n, " bottles")
} else if (n == 1) {
"1 bottle"
} else {
"no more bottles"
}
}
beer_bottles <- function(total_bottles) {
# print each lyric
for (current_bottles in seq(total_bottles, 0)) {
# first line
cat(str_to_sentence(str_c(bottles(current_bottles), " of beer on the wall, ", bottles(current_bottles), " of beer.\n")))
# second line
if (current_bottles > 0) {
cat(str_c(
"Take one down and pass it around, ", bottles(current_bottles - 1),
" of beer on the wall.\n"
))
} else {
cat(str_c("Go to the store and buy some more, ", bottles(total_bottles), " of beer on the wall.\n")) }
cat("\n")
}
}
beer_bottles(3)
## 3 bottles of beer on the wall, 3 bottles of beer.
## Take one down and pass it around, 2 bottles of beer on the wall.
##
## 2 bottles of beer on the wall, 2 bottles of beer.
## Take one down and pass it around, 1 bottle of beer on the wall.
##
## 1 bottle of beer on the wall, 1 bottle of beer.
## Take one down and pass it around, no more bottles of beer on the wall.
##
## No more bottles of beer on the wall, no more bottles of beer.
## Go to the store and buy some more, 3 bottles of beer on the wall.
x <- c(11, 12, 13)
print(names(x))
## NULL
#> NULL
for (nm in names(x)) {
print(nm)
print(x[[nm]])
}
#If there only some names, then we get an error for trying to access an element without a name.
x <- c(a = 11, 12, c = 13)
names(x)
## [1] "a" "" "c"
for (nm in names(x)) {
print(nm)
print(x[[nm]])
}
## [1] "a"
## [1] 11
## [1] ""
## Error in x[[nm]]: subscript out of bounds
# if the vector contains duplicate names, then x[[nm]] returns the first element with that name.
x <- c(a = 11, a = 12, c = 13)
names(x)
## [1] "a" "a" "c"
for (nm in names(x)) {
print(nm)
print(x[[nm]])
}
## [1] "a"
## [1] 11
## [1] "a"
## [1] 11
## [1] "c"
## [1] 13
show_mean <- function(df, digits = 2) {
# Get max length of all variable names in the dataset
maxstr <- max(str_length(names(df)))
for (nm in names(df)) {
if (is.numeric(df[[nm]])) {
cat(
str_c(str_pad(str_c(nm, ":"), maxstr + 1L, side = "right"),
format(mean(df[[nm]]), digits = digits, nsmall = digits),
sep = " "
),
"\n"
)
}
}
}
show_mean(iris)
## Sepal.Length: 5.84
## Sepal.Width: 3.06
## Petal.Length: 3.76
## Petal.Width: 1.20
Compute the mean of every column in mtcars. Determine the type of each column in nycflights13::flights. Compute the number of unique values in each column of iris. Generate 10 random normals for each of μ= −10 , 0 , 10 , and 100
#To calculate the mean of every column in mtcars, apply the function mean() to each column, and use map_dbl, since the results are numeric.
map_dbl(mtcars, mean)
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
#To calculate the type of every column in nycflights13::flights apply the function typeof(), discussed in the section on Vector basics, and use map_chr(), since the results are character.
map_chr(nycflights13::flights, typeof)
## year month day dep_time sched_dep_time
## "integer" "integer" "integer" "integer" "integer"
## dep_delay arr_time sched_arr_time arr_delay carrier
## "double" "integer" "integer" "double" "character"
## flight tailnum origin dest air_time
## "integer" "character" "character" "character" "double"
## distance hour minute time_hour
## "double" "double" "double" "double"
#The function n_distinct() calculates the number of unique values in a vector.
map_int(iris, n_distinct)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 35 23 43 22 3
map_dbl(iris, n_distinct)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 35 23 43 22 3
map_int(iris, function(x) length(unique(x)))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 35 23 43 22 3
map_int(iris, function(x) length(unique(x)))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 35 23 43 22 3
#Generate 10 random normals for each of μ= −10 , 0 , 10 , and 100
map(c(-10, 0, 10, 100), ~rnorm(n = 10, mean = .))
## [[1]]
## [1] -10.040509 -8.377949 -10.661548 -10.425124 -10.634487 -11.219784
## [7] -10.928110 -10.017908 -10.469457 -10.178721
##
## [[2]]
## [1] 1.8834881 -2.4043614 0.2893253 0.8607293 -0.3409145 -1.1159858
## [7] 1.5947398 0.9129872 0.1505654 -1.6111096
##
## [[3]]
## [1] 9.910664 10.192248 7.860912 12.691659 9.613689 10.435574 10.763063
## [8] 8.677833 9.136049 9.317961
##
## [[4]]
## [1] 100.56351 101.74481 98.31660 96.89202 100.01892 102.57980 100.04675
## [8] 100.41889 98.88398 101.32893
What does map_dbl(-2:2, rnorm, n = 5) do? Why?
map(-2:2, rnorm, n = 5)
## [[1]]
## [1] -2.8280861 -0.2764502 -2.2690586 -3.1893446 -1.0048463
##
## [[2]]
## [1] -0.08235484 -2.14569398 -2.79742729 -1.29668012 1.47352741
##
## [[3]]
## [1] -0.82558960 0.26163935 -0.92619358 -0.03439889 -1.50698776
##
## [[4]]
## [1] 0.9743125 0.7518809 1.1481643 0.9519482 2.6241986
##
## [[5]]
## [1] 1.0090739 1.7840416 0.4703622 1.0096348 0.7561794
map(-2:2, rnorm, n = 5) %>%
flatten_dbl()
## [1] -0.6174964 -1.2030087 -2.1791177 -2.8232231 -1.8997098 -1.1687931
## [7] -2.5292588 -0.3670285 -1.5292841 -0.2286154 0.3447048 -0.1286533
## [13] 0.2121289 1.0923856 0.2097538 2.5108405 0.2947574 0.8935331
## [19] 0.7898775 2.4039278 1.0823736 1.5407692 0.3419537 0.6421964
## [25] 1.1420430