# enter demo data
sick <- c(0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0)
age <- c(23, 25, 26, 34, 54, 46, 48, 95, 81, 42, 62, 25, 31, 49, 57, 52, 54, 63, 61, 50,
43, 35, 26, 74, 34, 46, 43, 65, 81, 42, 62, 25, 21, 47, 51, 22, 34, 59, 26, 55)
smoke <- c('Former', 'Former', 'Former', 'Never', 'Current', 'Current', 'Current', 'Current', 'Never', 'Former', 'Never', 'Former', 'Current', 'Former', 'Never', 'Current', 'Current', 'Current', 'Former', 'Never','Former', 'Former', 'Former', 'Never', 'Current', 'Current', 'Current', 'Current', 'Never', 'Former', 'Never', 'Former', 'Current', 'Former', 'Never', 'Current', 'Current', 'Current', 'Former', 'Never')
# create data frame
smokeData <- data.frame(sick, age, smoke)Easy Logistic Tables Using logisticTables()
Introduction
The logisticTables() function can be used to create easy knitr tables from the output of a binary logistic regression using glm() and the odds.n.ends package. This function combines summary output from baseR and includes additional output from odds.n.ends to make consistent tables for interpretation of a binary logistic regression.
Dependencies
The logisticTables() function has several dependencies required for successful execution:
Tidyverse(https://www.tidyverse.org/)odds.n.ends(https://cran.r-project.org/web/packages/odds.n.ends/vignettes/oddsnends-vignette.html | https://cran.r-project.org/web/packages/odds.n.ends/index.html)equatiomatic(https://datalorax.github.io/equatiomatic/) - must install from GitHubknitr(https://cran.r-project.org/web/packages/knitr/index.html)pscl(https://cran.r-project.org/web/packages/pscl/index.html)car(https://cran.r-project.org/web/packages/car/index.html)
Users must install these packages using install.packages() prior to use. The logisticTables() function will load the packages if they’re not loaded using library().
Sample Data & Sample GLM
The following sample data was pulled from the odds.n.ends package vignette.
The following code executes the generalized linear model (glm) utilizing the logit function:
glm1 <- glm(formula = sick ~ age + smoke, data = smokeData,
na.action = na.exclude, family = binomial(logit))logisticTables() Arguments and Output
The logisticTables() function requires two arguments:
modelwhere a glm() is specifiedoutputwhere the user selects the output they want to display as akabletable usingknitrsig: Model Significance and Accuracy Tablecoeff: Model Coefficients and Odds Ratioscont: A Contingency Table of Model Accuracyeq: A LaTeX output of the glm (must be using R Markdown or Quarto)eq_coeff: A LaTeX output of the glm with coefficients (must be using R Markdown or Quarto)vif: Extracts a table to check for multicollinearity by the variation inflation factor (VIF) for a GLM.
logisticTables() Function
The following code creates the logisticTables() function. This code may be copied and pasted into your code directly to utilize the function.
library(tidyverse)
library(odds.n.ends)
library(equatiomatic)
library(knitr)
library(pscl)
library(car)
logisticTables <- function(model, output) {
# Create Summary & Odds.n.Ends Summary
mod_sum <- summary(model)
mod_sum2 <- odds.n.ends(model)
# Create LaTeX Equation
if(output == 'eq') {
eq1 <- equatiomatic::extract_eq(model)
return(eq1)
}
if(output == 'eq_coeff') {
eq2 <- equatiomatic::extract_eq(model,use_coefs = TRUE)
return(eq2)
}
# Create Model Significance & Accuracy Table
if(output == 'sig') {
sig_table <- tibble(Variable = c('Chi-Squared','df','p Value','Accuracy','Model Sensitivity','Model Specificity','AIC','McF'),
Value = c(mod_sum2$`Logistic regression model significance`[1],mod_sum2$`Logistic regression model significance`[2],
mod_sum2$`Logistic regression model significance`[3],mod_sum2$`Count R-squared (model fit): percent correctly predicted`[1],
mod_sum2$`Model sensitivity`[1],mod_sum2$`Model specificity`[1],mod_sum$aic[1],pscl::pR2(glm1)[4])) %>%
mutate(Value = as.numeric(Value)) %>%
pivot_wider(names_from = Variable, values_from = Value) %>%
kable(digits = 3, align = 'c', caption = 'Logistic Regression Model Significance & Accuracy',col.names = c('Chi-squared ($\\chi^2$)','df','p Value','Accuracy','Sensitivity','Specificity','AIC','$\\ R^2_{McF}$'))
return(sig_table)
}
# Create Model Coefficients Table
if(output == 'coeff') {
mod_sum <- summary(model)
mod_sum2 <- odds.n.ends(model)
sum_rownnames1 <- rownames(mod_sum$coefficients)
sum_rownames2 <- rownames(mod_sum2$`Predictor odds ratios and 95% CI`)
mod_sum2_table <- as_tibble(mod_sum2$`Predictor odds ratios and 95% CI`) %>%
mutate(variable = sum_rownames2)
coeff_table <- as_tibble(mod_sum$coefficients) %>%
mutate(variable = sum_rownnames1) %>%
left_join(mod_sum2_table, by = 'variable') %>%
mutate(OR_perc = paste(round((OR-1)*100,2),'%',sep='')) %>%
select(variable, Estimate,`Std. Error`,`z value`,`Pr(>|z|)`,OR,OR_perc,`2.5 %`,`97.5 %`) %>%
kable(align = 'lcccccccc', col.names =
c('','$\\beta$','SE','z Value','p Value','Odds Ratio','OR%',' OR LCI', 'OR UCI'),
digits = 3, caption = 'Model Coefficients & Odds Ratios')
return(coeff_table)
}
# Create Contingency Table
if(output == 'cont') {
cont_table <- mod_sum2$`Contingency tables (model fit): frequency predicted` %>%
kable(caption = 'Contingency Table: Number of Predicted (Rows) | Number Observed (Columns)')
return(cont_table)
}
# Extract ROC Curve
if(output == 'roc') {
roc <- odds.n.ends(glm1,rocPlot = TRUE)
}
if(output == 'vif') {
vif <- car::vif(mod = model)
vif_rownames <- rownames(vif)
vif %>%
as_tibble() %>%
mutate(Variable = vif_rownames) %>%
relocate(Variable, before = 'GVIF') %>%
mutate(`GVIF^(1/(2*Df))` = ifelse(`GVIF^(1/(2*Df))` >= 2.5, paste(`GVIF^(1/(2*Df))`,'*',sep = ''),`GVIF^(1/(2*Df))`)) %>%
kable(align = 'lccc', col.names = c('Variable', 'GVIF', 'Df', '$\\ GVIF^{\\frac1{2*Df}}$'),
caption = 'Multicollinearity check using the variation inflation factor (VIF) for a GLM. The GVIF should be < 2.5. Issues with multicollinearity are flagged with an astrisk (*)')
}
}Model Equations
Model equations are extracted as LaTeX output using the equatiomatic package. Calls for these functions were added to the logisticTables() function for ease of access. Advanced features can be accessed by using the equatiomatic package directly.
Model Equation With Coefficients
logisticTables(model = glm1, output = 'eq')\[ \log\left[ \frac { P( \operatorname{sick} = \operatorname{1} ) }{ 1 - P( \operatorname{sick} = \operatorname{1} ) } \right] = \alpha + \beta_{1}(\operatorname{age}) + \beta_{2}(\operatorname{smoke}_{\operatorname{Former}}) + \beta_{3}(\operatorname{smoke}_{\operatorname{Never}}) \]
Model Equation With Coefficients
logisticTables(model = glm1, output = 'eq_coeff')\[ \log\left[ \frac { \widehat{P( \operatorname{sick} = \operatorname{1} )} }{ 1 - \widehat{P( \operatorname{sick} = \operatorname{1} )} } \right] = -3.29 + 0.1(\operatorname{age}) - 1.13(\operatorname{smoke}_{\operatorname{Former}}) - 2.47(\operatorname{smoke}_{\operatorname{Never}}) \]
Model Significance & Accuracy Table
logisticTables(model = glm1, output = 'sig')fitting null model for pseudo-r2
| Chi-squared (\(\chi^2\)) | df | p Value | Accuracy | Sensitivity | Specificity | AIC | \(\ R^2_{McF}\) |
|---|---|---|---|---|---|---|---|
| 16.652 | 3 | 0.001 | 80 | 0.826 | 0.765 | 45.896 | 0.305 |
Model Coefficients & Odds Ratios
logisticTables(model = glm1, output = 'coeff')| \(\beta\) | SE | z Value | p Value | Odds Ratio | OR% | OR LCI | OR UCI | |
|---|---|---|---|---|---|---|---|---|
| (Intercept) | -3.286 | 1.588 | -2.070 | 0.038 | 0.037 | -96.26% | 0.001 | 0.661 |
| age | 0.104 | 0.037 | 2.814 | 0.005 | 1.110 | 11.01% | 1.041 | 1.208 |
| smokeFormer | -1.125 | 0.947 | -1.189 | 0.235 | 0.325 | -67.55% | 0.046 | 2.054 |
| smokeNever | -2.472 | 1.251 | -1.976 | 0.048 | 0.084 | -91.56% | 0.005 | 0.816 |
logisticTables(model = glm1, output = 'roc')
Check for Multicollinearity
logisticTables(model = glm1, output = 'vif')| Variable | GVIF | Df | \(\ GVIF^{\frac1{2*Df}}\) |
|---|---|---|---|
| age | 1.674018 | 1 | 1.293838 |
| smoke | 1.674018 | 2 | 1.137470 |