As median household income increases, the chance that a county has broadband access will increase in Tennessee. Below is an interactive plot of the logistic regression curve that shows the “high broadband access” group at every level of income. The higher the income, the greater the probability of being in the high-access group.



Logistic Regression Results
Odds Ratios with 95% Confidence Intervals
term Odds_Ratio CI_Lower CI_Upper P_Value
(Intercept) 0.000 0.000 0.003 0.0000
IV 1.000 1.000 1.000 0.0000

Linearity of the Logit Test (Box-Tidwell)
Interaction term indicates violation if significant
term Estimate Std_Error P_Value
(Intercept) −25.601 9.664 0.0081
IV 0.004 0.002 0.0520
IV_log 0.000 0.000 0.0623

Inflection Point of Logistic Curve
Value of IV where predicted probability = 0.50
Probability Inflection_Point
0.5 46,300.817

Here is the R script that gathered the data and produced the chart:

# Install and load required packages
# ------------------------------
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("gt")) install.packages("gt")
if (!require("gtExtras")) install.packages("gtExtras")
if (!require("plotly")) install.packages("plotly")

library(ggplot2)
library(dplyr)
library(gt)
library(gtExtras)
library(plotly)


# ------------------------------
# Read the data
# ------------------------------
mydata <- read.csv("YOURFILENAME.csv") # <-- EDIT filename

# ################################################
# # (Optional) Remove specific case(es)s by row number
# ################################################
# # Example: remove rows 10 and 25
# rows_to_remove <- c(10, 25) # Edit and uncomment this line
# mydata <- mydata[-rows_to_remove, ] # Uncomment this line

# Specify dependent (DV) and independent (IV) variables
mydata$DV <- mydata$YOURDVNAME   # <-- EDIT DV column
mydata$IV <- mydata$YOURIVNAME   # <-- EDIT IV column

# Ensure DV is binary numeric (0/1)
mydata$DV <- as.numeric(as.character(mydata$DV))


# ------------------------------
# Logistic regression plot 
# ------------------------------
logit_plot <- ggplot(mydata, aes(x = IV, y = DV)) +
  geom_point(alpha = 0.5) +   # scatterplot of observed data
  geom_smooth(method = "glm",
              method.args = list(family = "binomial"),
              se = FALSE,
              color = "#1f78b4") +
  labs(title = "Logistic Regression Curve",
       x = "Independent Variable (IV)",
       y = "Dependent Variable (DV)")

logit_plotly <- ggplotly(logit_plot)


# ------------------------------
# Run logistic regression
# ------------------------------
options(scipen = 999)
log.ed <- glm(DV ~ IV, data = mydata, family = "binomial")

# Extract coefficients and odds ratios
results <- broom::tidy(log.ed, conf.int = TRUE, exponentiate = TRUE) %>%
  select(term, estimate, conf.low, conf.high, p.value) %>%
  rename(Odds_Ratio = estimate,
         CI_Lower = conf.low,
         CI_Upper = conf.high,
         P_Value = p.value)

# Display results as a nice gt table
results_table <- results %>%
  gt() %>%
  fmt_number(columns = c(Odds_Ratio, CI_Lower, CI_Upper), decimals = 3) %>%
  fmt_number(columns = P_Value, decimals = 4) %>%
  tab_header(
    title = "Logistic Regression Results",
    subtitle = "Odds Ratios with 95% Confidence Intervals"
  )


# ------------------------------
# Check linearity of the logit (Box-Tidwell test)
# ------------------------------
# (Assumes IV > 0; shift IV if needed)
mydata$IV_log <- mydata$IV * log(mydata$IV)
linearity_test <- glm(DV ~ IV + IV_log, data = mydata, family = "binomial")

linearity_results <- broom::tidy(linearity_test) %>%
  select(term, estimate, std.error, p.value) %>%
  rename(Estimate = estimate,
         Std_Error = std.error,
         P_Value = p.value)

linearity_table <- linearity_results %>%
  gt() %>%
  fmt_number(columns = c(Estimate, Std_Error), decimals = 3) %>%
  fmt_number(columns = P_Value, decimals = 4) %>%
  tab_header(
    title = "Linearity of the Logit Test (Box-Tidwell)",
    subtitle = "Interaction term indicates violation if significant"
  )


# ------------------------------
# Calculate the inflection point (p = .50)
# ------------------------------
p <- 0.50
Inflection_point <- (log(p/(1-p)) - coef(log.ed)[1]) / coef(log.ed)[2]

inflection_table <- tibble(
  Probability = 0.5,
  Inflection_Point = Inflection_point
) %>%
  gt() %>%
  fmt_number(columns = Inflection_Point, decimals = 3) %>%
  tab_header(
    title = "Inflection Point of Logistic Curve",
    subtitle = "Value of IV where predicted probability = 0.50"
  )


# ------------------------------
# Outputs
# ------------------------------
# Interactive plot
logit_plotly

# Tables
results_table
linearity_table
inflection_table