library(knitr)
library(janitor)
library(broom)
library(tidyverse)
Create a data set
set.seed(123)
outcome <- c(rep("Yes", 40), rep("No", 60))
predA <- rnorm(100, mean = 0, sd = 1)
predB <- rbinom(100, 1, 0.3)
predX <- c(rep("Low", 10), rep("Middle", 15),
rep("High", 50), rep("Middle", 25))
temp_data <- tibble(outcome, predA, predB, predX) %>%
mutate(predX = fct_relevel(
factor(predX), "Low", "Middle", "High"))
Fit logistic regression model for outcome
with A and B
mod01 <- glm(outcome == "Yes" ~ predA + predB,
family = binomial, data = temp_data)
tidy(mod01, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, std.error, conf.low, conf.high) %>%
kable(digits = 3)
(Intercept) |
0.736 |
0.241 |
0.456 |
1.177 |
predA |
0.915 |
0.226 |
0.582 |
1.426 |
predB |
0.722 |
0.459 |
0.286 |
1.751 |
Fit logistic regression model for outcome
adding in X
mod02 <- glm(outcome == "Yes" ~ predA + predB + predX,
family = binomial, data = temp_data)
tidy(mod02, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, std.error, conf.low, conf.high) %>%
kable(digits = 3)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
(Intercept) |
45567353.559 |
1245.579 |
0.000 |
NA |
predA |
0.892 |
0.249 |
0.542 |
1.452000e+00 |
predB |
0.767 |
0.500 |
0.276 |
2.002000e+00 |
predXMiddle |
0.000 |
1245.579 |
NA |
4.149098e+33 |
predXHigh |
0.000 |
1245.579 |
NA |
3.311521e+33 |
Why does this explosion in the standard error happen? Why are we getting all of these warnings?
temp_data %>% tabyl(outcome, predX)
outcome Low Middle High
No 0 25 35
Yes 10 15 15
If predX
is Low, we know that outcome
= Yes.
How can we fix this?
Collapse the predX
categories, or redefine them in some other way.
temp_data <- temp_data %>%
mutate(newpredX = fct_recode(predX,
"Not_High" = "Low",
"Not_High" = "Middle",
"High" = "High"))
temp_data %>% tabyl(outcome, newpredX)
outcome Not_High High
No 25 35
Yes 25 15
Fit logistic regression for outcome
adding in new X
mod03 <- glm(outcome == "Yes" ~ predA + predB + newpredX,
family = binomial, data = temp_data)
tidy(mod03, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, std.error, conf.low, conf.high) %>%
kable(digits = 3)
(Intercept) |
1.128 |
0.317 |
0.605 |
2.120 |
predA |
0.897 |
0.231 |
0.565 |
1.411 |
predB |
0.699 |
0.469 |
0.271 |
1.728 |
newpredXHigh |
0.420 |
0.421 |
0.180 |
0.948 |
LS0tDQp0aXRsZTogIlRveSBFeGFtcGxlIHdpdGggRXhwbG9zaXZlIENvZWZmaWNpZW50cyINCmF1dGhvcjogIlRob21hcyBFLiBMb3ZlIg0KZGF0ZTogImByIFN5cy5EYXRlKClgIg0Kb3V0cHV0OiANCiAgICBodG1sX2RvY3VtZW50Og0KICAgICAgICBjb2RlX2ZvbGRpbmc6IHNob3cNCiAgICAgICAgY29kZV9kb3dubG9hZDogVFJVRQ0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGNvbW1lbnQgPSBOQSkNCmBgYA0KDQpgYGB7ciwgbWVzc2FnZSA9IEZBTFNFfQ0KbGlicmFyeShrbml0cikNCmxpYnJhcnkoamFuaXRvcikNCmxpYnJhcnkoYnJvb20pDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmBgYA0KDQojIyBDcmVhdGUgYSBkYXRhIHNldA0KDQpgYGB7cn0NCnNldC5zZWVkKDEyMykNCm91dGNvbWUgPC0gYyhyZXAoIlllcyIsIDQwKSwgcmVwKCJObyIsIDYwKSkNCnByZWRBIDwtIHJub3JtKDEwMCwgbWVhbiA9IDAsIHNkID0gMSkNCnByZWRCIDwtIHJiaW5vbSgxMDAsIDEsIDAuMykNCnByZWRYIDwtIGMocmVwKCJMb3ciLCAxMCksIHJlcCgiTWlkZGxlIiwgMTUpLCANCiAgICAgICAgICAgcmVwKCJIaWdoIiwgNTApLCByZXAoIk1pZGRsZSIsIDI1KSkNCg0KdGVtcF9kYXRhIDwtIHRpYmJsZShvdXRjb21lLCBwcmVkQSwgcHJlZEIsIHByZWRYKSAlPiUNCiAgICBtdXRhdGUocHJlZFggPSBmY3RfcmVsZXZlbCgNCiAgICAgICAgZmFjdG9yKHByZWRYKSwgIkxvdyIsICJNaWRkbGUiLCAiSGlnaCIpKQ0KYGBgDQoNCiMjIEZpdCBsb2dpc3RpYyByZWdyZXNzaW9uIG1vZGVsIGZvciBgb3V0Y29tZWAgd2l0aCBBIGFuZCBCDQoNCmBgYHtyfQ0KbW9kMDEgPC0gZ2xtKG91dGNvbWUgPT0gIlllcyIgfiBwcmVkQSArIHByZWRCLCANCiAgICAgICAgICAgICBmYW1pbHkgPSBiaW5vbWlhbCwgZGF0YSA9IHRlbXBfZGF0YSkNCnRpZHkobW9kMDEsIGV4cG9uZW50aWF0ZSA9IFRSVUUsIGNvbmYuaW50ID0gVFJVRSkgJT4lDQogICAgc2VsZWN0KHRlcm0sIGVzdGltYXRlLCBzdGQuZXJyb3IsIGNvbmYubG93LCBjb25mLmhpZ2gpICU+JQ0KICAgIGthYmxlKGRpZ2l0cyA9IDMpDQpgYGANCg0KIyMgRml0IGxvZ2lzdGljIHJlZ3Jlc3Npb24gbW9kZWwgZm9yIGBvdXRjb21lYCBhZGRpbmcgaW4gWA0KDQpgYGB7cn0NCm1vZDAyIDwtIGdsbShvdXRjb21lID09ICJZZXMiIH4gcHJlZEEgKyBwcmVkQiArIHByZWRYLCANCiAgICAgICAgICAgICBmYW1pbHkgPSBiaW5vbWlhbCwgZGF0YSA9IHRlbXBfZGF0YSkNCmBgYA0KDQpgYGB7cn0NCnRpZHkobW9kMDIsIGV4cG9uZW50aWF0ZSA9IFRSVUUsIGNvbmYuaW50ID0gVFJVRSkgJT4lDQogICAgc2VsZWN0KHRlcm0sIGVzdGltYXRlLCBzdGQuZXJyb3IsIGNvbmYubG93LCBjb25mLmhpZ2gpICU+JQ0KICAgIGthYmxlKGRpZ2l0cyA9IDMpDQpgYGANCg0KIyMgV2h5IGRvZXMgdGhpcyBleHBsb3Npb24gaW4gdGhlIHN0YW5kYXJkIGVycm9yIGhhcHBlbj8gV2h5IGFyZSB3ZSBnZXR0aW5nIGFsbCBvZiB0aGVzZSB3YXJuaW5ncz8NCg0KYGBge3J9DQp0ZW1wX2RhdGEgJT4lIHRhYnlsKG91dGNvbWUsIHByZWRYKQ0KYGBgDQoNCklmIGBwcmVkWGAgaXMgTG93LCB3ZSBrbm93IHRoYXQgYG91dGNvbWVgID0gWWVzLg0KDQojIyBIb3cgY2FuIHdlIGZpeCB0aGlzPw0KDQpDb2xsYXBzZSB0aGUgYHByZWRYYCBjYXRlZ29yaWVzLCBvciByZWRlZmluZSB0aGVtIGluIHNvbWUgb3RoZXIgd2F5Lg0KDQpgYGB7cn0NCnRlbXBfZGF0YSA8LSB0ZW1wX2RhdGEgJT4lDQogICAgbXV0YXRlKG5ld3ByZWRYID0gZmN0X3JlY29kZShwcmVkWCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiTm90X0hpZ2giID0gIkxvdyIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiTm90X0hpZ2giID0gIk1pZGRsZSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiSGlnaCIgPSAiSGlnaCIpKQ0KYGBgDQoNCmBgYHtyfQ0KdGVtcF9kYXRhICU+JSB0YWJ5bChvdXRjb21lLCBuZXdwcmVkWCkNCmBgYA0KDQojIyBGaXQgbG9naXN0aWMgcmVncmVzc2lvbiBmb3IgYG91dGNvbWVgIGFkZGluZyBpbiBuZXcgWA0KDQpgYGB7cn0NCm1vZDAzIDwtIGdsbShvdXRjb21lID09ICJZZXMiIH4gcHJlZEEgKyBwcmVkQiArIG5ld3ByZWRYLCANCiAgICAgICAgICAgICBmYW1pbHkgPSBiaW5vbWlhbCwgZGF0YSA9IHRlbXBfZGF0YSkNCmBgYA0KDQpgYGB7cn0NCnRpZHkobW9kMDMsIGV4cG9uZW50aWF0ZSA9IFRSVUUsIGNvbmYuaW50ID0gVFJVRSkgJT4lDQogICAgc2VsZWN0KHRlcm0sIGVzdGltYXRlLCBzdGQuZXJyb3IsIGNvbmYubG93LCBjb25mLmhpZ2gpICU+JQ0KICAgIGthYmxlKGRpZ2l0cyA9IDMpDQpgYGA=