As median household income increases, the chance that a county has
broadband access will increase in Tennessee. Below is an interactive
plot of the logistic regression curve that shows the “high broadband
access” group at every level of income. The higher the income, the
greater the probability of being in the high-access group.
| Logistic Regression Results |
| Odds Ratios with 95% Confidence Intervals |
| term |
Odds_Ratio |
CI_Lower |
CI_Upper |
P_Value |
| (Intercept) |
0.000 |
0.000 |
0.003 |
0.0000 |
| IV |
1.000 |
1.000 |
1.000 |
0.0000 |
| Linearity of the Logit Test (Box-Tidwell) |
| Interaction term indicates violation if significant |
| term |
Estimate |
Std_Error |
P_Value |
| (Intercept) |
−25.601 |
9.664 |
0.0081 |
| IV |
0.004 |
0.002 |
0.0520 |
| IV_log |
0.000 |
0.000 |
0.0623 |
| Inflection Point of Logistic Curve |
| Value of IV where predicted probability = 0.50 |
| Probability |
Inflection_Point |
| 0.5 |
46,300.817 |
Here is the R script that gathered the data and produced the
chart:
# Install and load required packages
# ------------------------------
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("gt")) install.packages("gt")
if (!require("gtExtras")) install.packages("gtExtras")
if (!require("plotly")) install.packages("plotly")
library(ggplot2)
library(dplyr)
library(gt)
library(gtExtras)
library(plotly)
# ------------------------------
# Read the data
# ------------------------------
mydata <- read.csv("YOURFILENAME.csv") # <-- EDIT filename
# ################################################
# # (Optional) Remove specific case(es)s by row number
# ################################################
# # Example: remove rows 10 and 25
# rows_to_remove <- c(10, 25) # Edit and uncomment this line
# mydata <- mydata[-rows_to_remove, ] # Uncomment this line
# Specify dependent (DV) and independent (IV) variables
mydata$DV <- mydata$YOURDVNAME # <-- EDIT DV column
mydata$IV <- mydata$YOURIVNAME # <-- EDIT IV column
# Ensure DV is binary numeric (0/1)
mydata$DV <- as.numeric(as.character(mydata$DV))
# ------------------------------
# Logistic regression plot
# ------------------------------
logit_plot <- ggplot(mydata, aes(x = IV, y = DV)) +
geom_point(alpha = 0.5) + # scatterplot of observed data
geom_smooth(method = "glm",
method.args = list(family = "binomial"),
se = FALSE,
color = "#1f78b4") +
labs(title = "Logistic Regression Curve",
x = "Independent Variable (IV)",
y = "Dependent Variable (DV)")
logit_plotly <- ggplotly(logit_plot)
# ------------------------------
# Run logistic regression
# ------------------------------
options(scipen = 999)
log.ed <- glm(DV ~ IV, data = mydata, family = "binomial")
# Extract coefficients and odds ratios
results <- broom::tidy(log.ed, conf.int = TRUE, exponentiate = TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
rename(Odds_Ratio = estimate,
CI_Lower = conf.low,
CI_Upper = conf.high,
P_Value = p.value)
# Display results as a nice gt table
results_table <- results %>%
gt() %>%
fmt_number(columns = c(Odds_Ratio, CI_Lower, CI_Upper), decimals = 3) %>%
fmt_number(columns = P_Value, decimals = 4) %>%
tab_header(
title = "Logistic Regression Results",
subtitle = "Odds Ratios with 95% Confidence Intervals"
)
# ------------------------------
# Check linearity of the logit (Box-Tidwell test)
# ------------------------------
# (Assumes IV > 0; shift IV if needed)
mydata$IV_log <- mydata$IV * log(mydata$IV)
linearity_test <- glm(DV ~ IV + IV_log, data = mydata, family = "binomial")
linearity_results <- broom::tidy(linearity_test) %>%
select(term, estimate, std.error, p.value) %>%
rename(Estimate = estimate,
Std_Error = std.error,
P_Value = p.value)
linearity_table <- linearity_results %>%
gt() %>%
fmt_number(columns = c(Estimate, Std_Error), decimals = 3) %>%
fmt_number(columns = P_Value, decimals = 4) %>%
tab_header(
title = "Linearity of the Logit Test (Box-Tidwell)",
subtitle = "Interaction term indicates violation if significant"
)
# ------------------------------
# Calculate the inflection point (p = .50)
# ------------------------------
p <- 0.50
Inflection_point <- (log(p/(1-p)) - coef(log.ed)[1]) / coef(log.ed)[2]
inflection_table <- tibble(
Probability = 0.5,
Inflection_Point = Inflection_point
) %>%
gt() %>%
fmt_number(columns = Inflection_Point, decimals = 3) %>%
tab_header(
title = "Inflection Point of Logistic Curve",
subtitle = "Value of IV where predicted probability = 0.50"
)
# ------------------------------
# Outputs
# ------------------------------
# Interactive plot
logit_plotly
# Tables
results_table
linearity_table
inflection_table