Visualizing coefficients and CI using tidyverse

Tidyverse

tidyverse::map(.x, .f…)

The map functions apply a function to each element of a list and return an object of the same length as the input.

map() always returns a list.

library(tidyverse)

1:5 %>%
  map(rnorm, n = 10) # take a list 1:5, apply rnorm(10)

## [[1]]
##  [1]  1.67451127  1.73972610  1.85960782  0.61710598 -1.11172289  1.49131262
##  [7]  0.86700605  0.47828063 -0.13685203  0.01751604
## 
## [[2]]
##  [1] 2.9916353 1.9250806 2.1567099 2.5241246 1.0120961 2.6723280 2.8030650
##  [8] 0.5054575 1.6979228 2.3625194
## 
## [[3]]
##  [1] 2.681651 4.141192 3.114176 3.892186 1.669797 3.741292 1.416241 3.492351
##  [9] 2.067958 2.507642
## 
## [[4]]
##  [1] 4.783708 2.948677 3.952033 4.644135 4.662124 4.531743 4.410213 3.962545
##  [9] 4.429679 3.428949
## 
## [[5]]
##  [1] 5.396175 3.960089 5.842423 6.323959 4.797819 4.782779 5.777445 3.586000
##  [9] 4.669761 4.629637

sapply(1:5, rnorm, n=10)

##             [,1]        [,2]     [,3]       [,4]     [,5]
##  [1,] -0.9139615  1.84696976 2.532211  4.3085126 4.777586
##  [2,]  1.5304991  1.76882509 2.881194  3.7915658 5.380015
##  [3,]  2.2224088  0.88341067 2.500327  4.6315233 5.703314
##  [4,]  1.0249217 -0.07071838 2.527784  4.0739667 5.965322
##  [5,]  3.0962781  2.78720703 2.149767  4.6639623 5.217612
##  [6,]  1.8870596  1.27354961 1.705146 -0.2530315 4.518259
##  [7,]  0.1714201  1.24795050 1.782736  4.1936623 5.925042
##  [8,]  1.9774182  2.90032161 3.289727  2.4942368 7.034598
##  [9,]  0.9971900  1.67661526 3.565695  3.7761223 4.870095
## [10,]  2.4413275  0.60327948 2.962241  3.1553684 5.639992

tidyr::nest()
nest() function in tidyr makes the smaller dataframes available to us as a list within a dataframe. This can be extremely handy for any downstream analysis.

nest(data): data= column(s) to nest. nest(data=everything()): nest everything: tibble:1x1.

ref. group_by(name) ~ nest(count)

df <- tibble(x = c(1, 1, 1, 2, 2, 3), y = 1:6, z = 6:1)
df %>% nest(data = c(y, z)) -> outcome
outcome

## # A tibble: 3 x 2
##       x data            
##   <dbl> <list>          
## 1     1 <tibble [3 x 2]>
## 2     2 <tibble [2 x 2]>
## 3     3 <tibble [1 x 2]>

#make simple dataframe of 2 columns
df <- data.frame(name=c("a","b","b","c","c","c"), count=c(1,2,3,1,2,3))
#nest "count" column within data frame; call the nested column "dat". 
df %>% nest(dat=count) -> outcome2

map(outcome2, 3) # print third element of the nested data.

## $name
## [1] "c"
## 
## $dat
## # A tibble: 3 x 1
##   count
##   <dbl>
## 1     1
## 2     2
## 3     3

unnest(outcome2, cols=c(dat))

## # A tibble: 6 x 2
##   name  count
##   <chr> <dbl>
## 1 a         1
## 2 b         2
## 3 b         3
## 4 c         1
## 5 c         2
## 6 c         3

df %>% group_by(name) %>% summarise(mean(count))

## # A tibble: 3 x 2
##   name  `mean(count)`
##   <chr>         <dbl>
## 1 a               1  
## 2 b               2.5
## 3 c               2

pivot_longer()
“lengthens” data, increasing the number of rows and decreasing the number of columns. The inverse transformation is pivot_wider()

pivot_longer(mtcars, cols = everything())

## # A tibble: 352 x 2
##    name   value
##    <chr>  <dbl>
##  1 mpg    21   
##  2 cyl     6   
##  3 disp  160   
##  4 hp    110   
##  5 drat    3.9 
##  6 wt      2.62
##  7 qsec   16.5 
##  8 vs      0   
##  9 am      1   
## 10 gear    4   
## # ... with 342 more rows

pivot_longer(mtcars, cols = everything(), 
             names_to = "new name", values_to="new variable", 
             values_drop_na = FALSE)

## # A tibble: 352 x 2
##    `new name` `new variable`
##    <chr>               <dbl>
##  1 mpg                 21   
##  2 cyl                  6   
##  3 disp               160   
##  4 hp                 110   
##  5 drat                 3.9 
##  6 wt                   2.62
##  7 qsec                16.5 
##  8 vs                   0   
##  9 am                   1   
## 10 gear                 4   
## # ... with 342 more rows

Coefficient plot

data(birthwt, package="MASS")
library(tidyverse)
library(ggplot2)

birthwt <- as_tibble(birthwt) # convert to a nicer data frame.

## Get the models
birthwt %>%
  ## a 1x1 tibble
  nest(dat=everything()) %>%
  
  ## two new columns consisting of linear regression models. 
  ## data=. equivalent to data=birthwt. data frame before nesting.
  mutate(Model1 = map(dat, ~lm(bwt~smoke, data=.)), 
         Model2 = map(dat, ~lm(bwt~smoke+age, data=.))) %>%
  
  ## delete dat column
  select(-dat) %>%
  
  ## pivot the remaining columns, Model1 and Model2
  ## A tibble: 2 x 2
  ## Model  Result
  ## <chr>  <list>
  ## 1 Model1 <lm>  
  ## 2 Model2 <lm>  
  pivot_longer(everything(), names_to="Model", values_to="Result") %>% 
  
  ## tidy up the results: Take every element of Result(model in this case)
  ## and extract important values. broom::tidy(model) returns term, estimate,
  ## std.error, statistic and  p.value by default. It returns a 2x5 tibble. 
  ## A tibble: 2 x 2
  ## Model  Result          
  ##  <chr>  <list>          
  ## 1 Model1 <tibble [2 x 7]>
  ## 2 Model2 <tibble [3 x 7]>
  mutate(Result=map(Result, broom::tidy, conf.int=TRUE)) %>%

  ## Result is a list containing two tibbles. Unnest the list.
  ## The column of Result expands to 7.
  unnest(Result) -> result

result

## # A tibble: 5 x 8
##   Model  term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>  <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 Model1 (Intercept)   3056.      66.9      45.7  2.46e-103  2924.      3188. 
## 2 Model1 smoke         -284.     107.       -2.65 8.67e-  3  -495.       -72.8
## 3 Model2 (Intercept)   2791.     241.       11.6  1.03e- 23  2316.      3267. 
## 4 Model2 smoke         -278.     107.       -2.60 1.00e-  2  -489.       -67.3
## 5 Model2 age             11.3      9.88      1.14 2.55e-  1    -8.20      30.8

result %>% 
  ## select relevant columns only.
  select(Model, term, estimate, conf.high, conf.low) %>%
  ## filter in relevant rows only
  filter(term=="smoke") -> result

knitr::kable(result) # print result nicely.

Model	term	estimate	conf.high	conf.low
Model1	smoke	-283.7767	-72.75612	-494.7973
Model2	smoke	-278.3561	-67.29256	-489.4197

Plot

#change the order of model1 and model2 as coor will flop.
result$Model <- factor(result$Model, levels=c("Model2", "Model1"))

p <- ggplot(result, aes(x = Model, y = estimate,
                     color = Model, shape = Model))+
  coord_flip() +
  geom_hline(yintercept=0, linetype="dashed") +
  geom_point(size = 3) +
  geom_linerange(aes(ymin = conf.low, ymax = conf.high)) +
  ylab("Coefficient Estimates and CI")

p

Visualizing coefficients and CI using tidyverse

10/21/2020

Tidyverse

Coefficient plot