mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
head(mydata)
## admit gre gpa rank
## 1 0 380 3.61 3
## 2 1 660 3.67 3
## 3 1 800 4.00 1
## 4 1 640 3.19 4
## 5 0 520 2.93 4
## 6 1 760 3.00 2
mydata$rank <- factor(mydata$rank)
mylogit <- glm(admit ~ gre + gpa + rank, data = mydata, family = "binomial")
newdata4 <- with(mydata, data.frame(gre = mean(gre),
gpa = rep(seq(from = 2.2, to = 4.0, length.out = 10), 4),
rank = factor(rep(1:4, each = 10))))
newdata5 <- cbind(newdata4, predict(mylogit, newdata = newdata4, type = "response", se = T))
head(newdata5)
## gre gpa rank fit se.fit residual.scale
## 1 587.7 2.2 1 0.2910493 0.09991960 1
## 2 587.7 2.4 1 0.3253075 0.09443782 1
## 3 587.7 2.6 1 0.3615418 0.08777126 1
## 4 587.7 2.8 1 0.3994227 0.08058163 1
## 5 587.7 3.0 1 0.4385464 0.07382864 1
## 6 587.7 3.2 1 0.4784494 0.06869247 1
newdata5 <- within(newdata5, {
LL <- fit - (1.96 * se.fit)
UL <- fit + (1.96 * se.fit)
})
library(ggplot2)
ggplot(newdata5, aes(x = gpa, y = fit)) +
geom_ribbon(aes(ymin = LL, ymax = UL, fill = rank), alpha = 0.2) +
geom_line(aes(colour = rank), size = 0.75) +
labs(y = "Predicted probability of being admitted") +
ylim(0, 1)
Use crossing for getting a tibble of all the combinations of given values
library(tidyr)
new_data <- crossing(gre = mean(mydata$gre),
gpa = seq(from = 2.2, to = 4.0, length.out = 10),
rank = factor(1:4))
new_data
## # A tibble: 40 x 3
## gre gpa rank
## <dbl> <dbl> <fct>
## 1 588. 2.2 1
## 2 588. 2.2 2
## 3 588. 2.2 3
## 4 588. 2.2 4
## 5 588. 2.4 1
## 6 588. 2.4 2
## 7 588. 2.4 3
## 8 588. 2.4 4
## 9 588. 2.6 1
## 10 588. 2.6 2
## # … with 30 more rows
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ readr 1.3.1 ✔ stringr 1.4.0
## ✔ purrr 0.3.3 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(broom)
mylogit %>%
augment(newdata = new_data, type.predict = "response")
## # A tibble: 40 x 5
## gre gpa rank .fitted .se.fit
## <dbl> <dbl> <fct> <dbl> <dbl>
## 1 588. 2.2 1 0.291 0.0999
## 2 588. 2.2 2 0.173 0.0613
## 3 588. 2.2 3 0.0970 0.0426
## 4 588. 2.2 4 0.0800 0.0375
## 5 588. 2.4 1 0.325 0.0944
## 6 588. 2.4 2 0.197 0.0584
## 7 588. 2.4 3 0.112 0.0426
## 8 588. 2.4 4 0.0927 0.0387
## 9 588. 2.6 1 0.362 0.0878
## 10 588. 2.6 2 0.224 0.0542
## # … with 30 more rows
mylogit %>%
augment(newdata = new_data, type.predict = "response") %>%
mutate(LL = (.fitted - (1.96 * .se.fit)),
UL = (.fitted + (1.96 * .se.fit))) %>%
ggplot(aes(gpa, .fitted, fill = rank)) +
geom_line() +
labs(y = "Predicted probability of being admitted") +
xlim(2.2,4) +
geom_ribbon(aes(ymin = LL, ymax = UL, fill = rank), alpha = 0.2) +
geom_line(aes(colour = rank), size = 0.75) +
ylim(0, 1)
Source: David Robinson, DCR 12/02/2019 https://www.youtube.com/watch?v=NDHSBUN_rVU