tidyverse::map(.x, .f…)
The map functions apply a function to each element of a list and return an object of the same length as the input.
map() always returns a list.
library(tidyverse)
1:5 %>%
map(rnorm, n = 10) # take a list 1:5, apply rnorm(10)
## [[1]]
## [1] 1.67451127 1.73972610 1.85960782 0.61710598 -1.11172289 1.49131262
## [7] 0.86700605 0.47828063 -0.13685203 0.01751604
##
## [[2]]
## [1] 2.9916353 1.9250806 2.1567099 2.5241246 1.0120961 2.6723280 2.8030650
## [8] 0.5054575 1.6979228 2.3625194
##
## [[3]]
## [1] 2.681651 4.141192 3.114176 3.892186 1.669797 3.741292 1.416241 3.492351
## [9] 2.067958 2.507642
##
## [[4]]
## [1] 4.783708 2.948677 3.952033 4.644135 4.662124 4.531743 4.410213 3.962545
## [9] 4.429679 3.428949
##
## [[5]]
## [1] 5.396175 3.960089 5.842423 6.323959 4.797819 4.782779 5.777445 3.586000
## [9] 4.669761 4.629637
sapply(1:5, rnorm, n=10)
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.9139615 1.84696976 2.532211 4.3085126 4.777586
## [2,] 1.5304991 1.76882509 2.881194 3.7915658 5.380015
## [3,] 2.2224088 0.88341067 2.500327 4.6315233 5.703314
## [4,] 1.0249217 -0.07071838 2.527784 4.0739667 5.965322
## [5,] 3.0962781 2.78720703 2.149767 4.6639623 5.217612
## [6,] 1.8870596 1.27354961 1.705146 -0.2530315 4.518259
## [7,] 0.1714201 1.24795050 1.782736 4.1936623 5.925042
## [8,] 1.9774182 2.90032161 3.289727 2.4942368 7.034598
## [9,] 0.9971900 1.67661526 3.565695 3.7761223 4.870095
## [10,] 2.4413275 0.60327948 2.962241 3.1553684 5.639992
tidyr::nest()
nest() function in tidyr makes the smaller dataframes available to us as a list within a dataframe. This can be extremely handy for any downstream analysis.
nest(data): data= column(s) to nest. nest(data=everything()): nest everything: tibble:1x1.
ref. group_by(name) ~ nest(count)
df <- tibble(x = c(1, 1, 1, 2, 2, 3), y = 1:6, z = 6:1)
df %>% nest(data = c(y, z)) -> outcome
outcome
## # A tibble: 3 x 2
## x data
## <dbl> <list>
## 1 1 <tibble [3 x 2]>
## 2 2 <tibble [2 x 2]>
## 3 3 <tibble [1 x 2]>
#make simple dataframe of 2 columns
df <- data.frame(name=c("a","b","b","c","c","c"), count=c(1,2,3,1,2,3))
#nest "count" column within data frame; call the nested column "dat".
df %>% nest(dat=count) -> outcome2
map(outcome2, 3) # print third element of the nested data.
## $name
## [1] "c"
##
## $dat
## # A tibble: 3 x 1
## count
## <dbl>
## 1 1
## 2 2
## 3 3
unnest(outcome2, cols=c(dat))
## # A tibble: 6 x 2
## name count
## <chr> <dbl>
## 1 a 1
## 2 b 2
## 3 b 3
## 4 c 1
## 5 c 2
## 6 c 3
df %>% group_by(name) %>% summarise(mean(count))
## # A tibble: 3 x 2
## name `mean(count)`
## <chr> <dbl>
## 1 a 1
## 2 b 2.5
## 3 c 2
pivot_longer()
“lengthens” data, increasing the number of rows and decreasing the number of columns. The inverse transformation is pivot_wider()
pivot_longer(mtcars, cols = everything())
## # A tibble: 352 x 2
## name value
## <chr> <dbl>
## 1 mpg 21
## 2 cyl 6
## 3 disp 160
## 4 hp 110
## 5 drat 3.9
## 6 wt 2.62
## 7 qsec 16.5
## 8 vs 0
## 9 am 1
## 10 gear 4
## # ... with 342 more rows
pivot_longer(mtcars, cols = everything(),
names_to = "new name", values_to="new variable",
values_drop_na = FALSE)
## # A tibble: 352 x 2
## `new name` `new variable`
## <chr> <dbl>
## 1 mpg 21
## 2 cyl 6
## 3 disp 160
## 4 hp 110
## 5 drat 3.9
## 6 wt 2.62
## 7 qsec 16.5
## 8 vs 0
## 9 am 1
## 10 gear 4
## # ... with 342 more rows
data(birthwt, package="MASS")
library(tidyverse)
library(ggplot2)
birthwt <- as_tibble(birthwt) # convert to a nicer data frame.
## Get the models
birthwt %>%
## a 1x1 tibble
nest(dat=everything()) %>%
## two new columns consisting of linear regression models.
## data=. equivalent to data=birthwt. data frame before nesting.
mutate(Model1 = map(dat, ~lm(bwt~smoke, data=.)),
Model2 = map(dat, ~lm(bwt~smoke+age, data=.))) %>%
## delete dat column
select(-dat) %>%
## pivot the remaining columns, Model1 and Model2
## A tibble: 2 x 2
## Model Result
## <chr> <list>
## 1 Model1 <lm>
## 2 Model2 <lm>
pivot_longer(everything(), names_to="Model", values_to="Result") %>%
## tidy up the results: Take every element of Result(model in this case)
## and extract important values. broom::tidy(model) returns term, estimate,
## std.error, statistic and p.value by default. It returns a 2x5 tibble.
## A tibble: 2 x 2
## Model Result
## <chr> <list>
## 1 Model1 <tibble [2 x 7]>
## 2 Model2 <tibble [3 x 7]>
mutate(Result=map(Result, broom::tidy, conf.int=TRUE)) %>%
## Result is a list containing two tibbles. Unnest the list.
## The column of Result expands to 7.
unnest(Result) -> result
result
## # A tibble: 5 x 8
## Model term estimate std.error statistic p.value conf.low conf.high
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Model1 (Intercept) 3056. 66.9 45.7 2.46e-103 2924. 3188.
## 2 Model1 smoke -284. 107. -2.65 8.67e- 3 -495. -72.8
## 3 Model2 (Intercept) 2791. 241. 11.6 1.03e- 23 2316. 3267.
## 4 Model2 smoke -278. 107. -2.60 1.00e- 2 -489. -67.3
## 5 Model2 age 11.3 9.88 1.14 2.55e- 1 -8.20 30.8
result %>%
## select relevant columns only.
select(Model, term, estimate, conf.high, conf.low) %>%
## filter in relevant rows only
filter(term=="smoke") -> result
knitr::kable(result) # print result nicely.
| Model | term | estimate | conf.high | conf.low |
|---|---|---|---|---|
| Model1 | smoke | -283.7767 | -72.75612 | -494.7973 |
| Model2 | smoke | -278.3561 | -67.29256 | -489.4197 |
Plot
#change the order of model1 and model2 as coor will flop.
result$Model <- factor(result$Model, levels=c("Model2", "Model1"))
p <- ggplot(result, aes(x = Model, y = estimate,
color = Model, shape = Model))+
coord_flip() +
geom_hline(yintercept=0, linetype="dashed") +
geom_point(size = 3) +
geom_linerange(aes(ymin = conf.low, ymax = conf.high)) +
ylab("Coefficient Estimates and CI")
p