Old routine

newdata4 <- with(mydata, data.frame(gre = mean(gre), 
                                    gpa = rep(seq(from = 2.2, to = 4.0, length.out = 10), 4),
                                    rank = factor(rep(1:4, each = 10))))
newdata5 <- cbind(newdata4, predict(mylogit, newdata = newdata4, type = "response", se = T))
head(newdata5)

##     gre gpa rank       fit     se.fit residual.scale
## 1 587.7 2.2    1 0.2910493 0.09991960              1
## 2 587.7 2.4    1 0.3253075 0.09443782              1
## 3 587.7 2.6    1 0.3615418 0.08777126              1
## 4 587.7 2.8    1 0.3994227 0.08058163              1
## 5 587.7 3.0    1 0.4385464 0.07382864              1
## 6 587.7 3.2    1 0.4784494 0.06869247              1

newdata5 <- within(newdata5, {
  LL <- fit - (1.96 * se.fit)
  UL <- fit + (1.96 * se.fit)
})
library(ggplot2)
ggplot(newdata5, aes(x = gpa, y = fit)) +
  geom_ribbon(aes(ymin = LL, ymax = UL, fill = rank), alpha = 0.2) +
  geom_line(aes(colour = rank), size = 0.75) +
  labs(y = "Predicted probability of being admitted") +
  ylim(0, 1)

Tidy routine

Use crossing for getting a tibble of all the combinations of given values

library(tidyr)
new_data <- crossing(gre = mean(mydata$gre),
                     gpa = seq(from = 2.2, to = 4.0, length.out = 10),
                     rank = factor(1:4))
new_data

## # A tibble: 40 x 3
##      gre   gpa rank 
##    <dbl> <dbl> <fct>
##  1  588.   2.2 1    
##  2  588.   2.2 2    
##  3  588.   2.2 3    
##  4  588.   2.2 4    
##  5  588.   2.4 1    
##  6  588.   2.4 2    
##  7  588.   2.4 3    
##  8  588.   2.4 4    
##  9  588.   2.6 1    
## 10  588.   2.6 2    
## # … with 30 more rows

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ readr   1.3.1     ✔ stringr 1.4.0
## ✔ purrr   0.3.3     ✔ forcats 0.4.0

## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(broom)
mylogit %>% 
  augment(newdata = new_data, type.predict = "response")

## # A tibble: 40 x 5
##      gre   gpa rank  .fitted .se.fit
##    <dbl> <dbl> <fct>   <dbl>   <dbl>
##  1  588.   2.2 1      0.291   0.0999
##  2  588.   2.2 2      0.173   0.0613
##  3  588.   2.2 3      0.0970  0.0426
##  4  588.   2.2 4      0.0800  0.0375
##  5  588.   2.4 1      0.325   0.0944
##  6  588.   2.4 2      0.197   0.0584
##  7  588.   2.4 3      0.112   0.0426
##  8  588.   2.4 4      0.0927  0.0387
##  9  588.   2.6 1      0.362   0.0878
## 10  588.   2.6 2      0.224   0.0542
## # … with 30 more rows

mylogit %>% 
  augment(newdata = new_data, type.predict = "response") %>% 
  mutate(LL = (.fitted - (1.96 * .se.fit)),
         UL = (.fitted + (1.96 * .se.fit))) %>%  
  ggplot(aes(gpa, .fitted, fill = rank)) +
  geom_line() +
  labs(y = "Predicted probability of being admitted") +
  xlim(2.2,4) +
  geom_ribbon(aes(ymin = LL, ymax = UL, fill = rank), alpha = 0.2) +
  geom_line(aes(colour = rank), size = 0.75) +
  ylim(0, 1)

Source: David Robinson, DCR 12/02/2019 https://www.youtube.com/watch?v=NDHSBUN_rVU

Compare old and new pred routine

Anna S

10 03 2020

Old routine

Tidy routine