code is a tool of communication
library(magrittr)
library(dplyr)
library(pryr)
library(ggplot2)
diamonds <- diamonds
diamonds2 <- diamonds %>%
mutate(price_per_carat = price / carat)
object_size(diamonds)
3.46 MB
object_size(diamonds2)
3.89 MB
object_size(diamonds,diamonds2)
3.89 MB
Other tools from the magrittr package.
rnorm(100) %>%
matrix(ncol = 2) %>%
plot() %>%
str()
NULL
tee
rnorm(100) %>%
matrix(ncol = 2) %T>%
plot() %>%
str()
num [1:50, 1:2] 0.433 1.173 0.386 0.25 0.568 ...
explodes out.
mtcars %$%
cor(disp, mpg)
[1] -0.8475514
mtcars %<>% transform(cyl = cyl * 2)
df <- tibble::tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
(max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$b <- (df$b - min(df$b, na.rm = TRUE)) /
(max(df$b, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$c <- (df$c - min(df$c, na.rm = TRUE)) /
(max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))
df$d <- (df$d - min(df$d, na.rm = TRUE)) /
(max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))
rescale01 <- function(x) {
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
rescale01(c(0, 5, 10))
[1] 0.0 0.5 1.0
x <- c(1:10, Inf)
rescale01(x)
[1] 0 0 0 0 0 0 0 0 0 0 NaN
rescale01 <- function(x) {
rng <- range(x, na.rm = TRUE, finite = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
rescale01(x)
[1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667 0.7777778 0.8888889
[10] 1.0000000 Inf
# Compute confidence interval around mean using normal approximation
mean_ci <- function(x, conf = 0.95) {
se <- sd(x) / sqrt(length(x))
alpha <- 1 - conf
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
x <- runif(100)
mean_ci(x)
[1] 0.4650151 0.5799727
#> [1] 0.498 0.610
mean_ci(x, conf = 0.99)
[1] 0.4469539 0.5980339
#> [1] 0.480 0.628
wt_mean <- function(x, w) {
sum(x * w) / sum(w)
}
wt_var <- function(x, w) {
mu <- wt_mean(x, w)
sum(w * (x - mu) ^ 2) / sum(w)
}
wt_sd <- function(x, w) {
sqrt(wt_var(x, w))
}
wt_mean(1:6, 1:3)
[1] 7.666667
wt_mean <- function(x, w) {
if (length(x) != length(w)) {
stop("`x` and `w` must be the same length", call. = FALSE)
}
sum(w * x) / sum(w)
}
library(tidyverse)
typeof(letters)
[1] "character"
typeof(1:10)
[1] "integer"
1:10 %% 3 == 0
[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
typeof(1)
[1] "double"
typeof(1L)
[1] "integer"
sample(10) + 100
[1] 106 103 109 105 108 101 107 110 102 104
runif(10) > 0.5
[1] TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE FALSE FALSE
tibble(x = 1:4, y = rep(1:2, 2))
tibble(x = 1:4, y = rep(1:2, each = 2))
x_named <- list(a = 1, b = 2, c = 3)
str(x_named)
List of 3
$ a: num 1
$ b: num 2
$ c: num 3
x1 <- list(c(1, 2), c(3, 4))
x2 <- list(list(1, 2), list(3, 4))
x3 <- list(1, list(2, list(3)))
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
flip <- function() sample(c("T", "H"), 1)
flips <- 0
nheads <- 0
while (nheads < 3) {
if (flip() == "H") {
nheads <- nheads + 1
} else {
nheads <- 0
}
flips <- flips + 1
}
flips
[1] 17
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
map_dbl(df, mean)
a b c d
-0.3449303 -0.4874738 0.3718107 0.2297544
map_dbl(df, median)
a b c d
-0.3854993 -0.4215123 0.4789902 0.3901601
map_dbl(df, sd)
a b c d
0.4261496 1.1121509 0.5676953 1.2017317
df %>% map_dbl(mean)
a b c d
-0.3449303 -0.4874738 0.3718107 0.2297544
df %>% map_dbl(median)
a b c d
-0.3854993 -0.4215123 0.4789902 0.3901601
df %>% map_dbl(sd)
a b c d
0.4261496 1.1121509 0.5676953 1.2017317
models <- mtcars %>%
split(.$cyl) %>%
map(function(df) lm(mpg ~ wt, data = df))
models <- mtcars %>%
split(.$cyl) %>%
map(~lm(mpg ~ wt, data = .))
models %>%
map(summary) %>%
map_dbl(~.$r.squared)
8 12 16
0.5086326 0.4645102 0.4229655