Hadley uses the gapminder dataset to demonstrate the value of dataframes of nested dataframes/lists for comparing models. Each functions can be run on each dataframe using the map() function from the purrr package (basically Hadley’s implementation of lapply()).

As usual the talk is great. Check it out at YouTube.

by_country <- gapminder %>%
  mutate(year1950 = year - 1950 ) %>%  # dataset starts at 1950
  group_by(continent, country) %>%     
  nest()                  # all columns is placed in a dataframe 
                          # which is placed into a column "data"

country_model <- function(df){
  lm(lifeExp ~ year1950, data = df)
}

models <- by_country %>% 
  mutate(model = map(data, country_model)) %>%
  # model results are summarised in tidy dataframes using broom
  mutate(glance = map(model, broom::glance),
         rsq    = glance %>% map_dbl("r.squared"),
         tidy   = map(model, broom::tidy),
         augment= map(model, broom::augment))
models
## # A tibble: 142 × 8
##    continent     country              data    model                glance
##       <fctr>      <fctr>            <list>   <list>                <list>
## 1       Asia Afghanistan <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 2     Europe     Albania <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 3     Africa     Algeria <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 4     Africa      Angola <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 5   Americas   Argentina <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 6    Oceania   Australia <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 7     Europe     Austria <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 8       Asia     Bahrain <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 9       Asia  Bangladesh <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## 10    Europe     Belgium <tibble [12 × 5]> <S3: lm> <data.frame [1 × 11]>
## # ... with 132 more rows, and 3 more variables: rsq <dbl>, tidy <list>,
## #   augment <list>
ggplot(models, aes(x = rsq, y= reorder(country, rsq))) +
  geom_point(aes(color = continent)) +
  labs(y = "country") +
  theme(axis.text.y = element_blank(),
        axis.ticks.y= element_blank())

you can get the data out of each of the result dataframes using the unnest() function.

models %>% unnest(glance)
## # A tibble: 142 × 18
##    continent     country              data    model       rsq
##       <fctr>      <fctr>            <list>   <list>     <dbl>
## 1       Asia Afghanistan <tibble [12 × 5]> <S3: lm> 0.9477123
## 2     Europe     Albania <tibble [12 × 5]> <S3: lm> 0.9105778
## 3     Africa     Algeria <tibble [12 × 5]> <S3: lm> 0.9851172
## 4     Africa      Angola <tibble [12 × 5]> <S3: lm> 0.8878146
## 5   Americas   Argentina <tibble [12 × 5]> <S3: lm> 0.9955681
## 6    Oceania   Australia <tibble [12 × 5]> <S3: lm> 0.9796477
## 7     Europe     Austria <tibble [12 × 5]> <S3: lm> 0.9921340
## 8       Asia     Bahrain <tibble [12 × 5]> <S3: lm> 0.9667398
## 9       Asia  Bangladesh <tibble [12 × 5]> <S3: lm> 0.9893609
## 10    Europe     Belgium <tibble [12 × 5]> <S3: lm> 0.9945406
## # ... with 132 more rows, and 13 more variables: tidy <list>,
## #   augment <list>, r.squared <dbl>, adj.r.squared <dbl>, sigma <dbl>,
## #   statistic <dbl>, p.value <dbl>, df <int>, logLik <dbl>, AIC <dbl>,
## #   BIC <dbl>, deviance <dbl>, df.residual <int>
models %>% 
  unnest(tidy) %>% 
  select(continent, country, term, estimate, rsq) %>% 
  spread(term, estimate) %>% 
  ggplot(aes(`(Intercept)`, year1950)) +  # Life Expectancy in 1950 vs. slope ("yearlt increase")
    geom_point(aes(color= continent, size = rsq))+
    geom_smooth(se= FALSE) + 
    xlab("Life expectancy (1950)") + 
    ylab("Yearly Improvement") +
    scale_size_area()
## `geom_smooth()` using method = 'loess'

Residual plot of resduals from all 100+ models

models %>% 
  unnest(augment) %>% 
  ggplot(aes(year1950, .resid)) +
    geom_line(aes(group= country), alpha = 1/3) +
    geom_hline(yintercept = 0, color= "white", size= 2) +
    geom_smooth(se= FALSE) +
  facet_wrap(~ continent)
## `geom_smooth()` using method = 'loess'