TNBB Access Data Logistic Regression

As median household income increases, the chance that a county has broadband access will increase in Tennessee. Below is an interactive plot of the logistic regression curve that shows the “high broadband access” group at every level of income. The higher the income, the greater the probability of being in the high-access group.

term	Odds_Ratio	CI_Lower	CI_Upper	P_Value
Logistic Regression Results
Odds Ratios with 95% Confidence Intervals
(Intercept)	0.000	0.000	0.003	0.0000
IV	1.000	1.000	1.000	0.0000

term	Estimate	Std_Error	P_Value
Linearity of the Logit Test (Box-Tidwell)
Interaction term indicates violation if significant
(Intercept)	−25.601	9.664	0.0081
IV	0.004	0.002	0.0520
IV_log	0.000	0.000	0.0623

Probability	Inflection_Point
Inflection Point of Logistic Curve
Value of IV where predicted probability = 0.50
0.5	46,300.817

Here is the R script that gathered the data and produced the chart:

# Install and load required packages
# ------------------------------
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("gt")) install.packages("gt")
if (!require("gtExtras")) install.packages("gtExtras")
if (!require("plotly")) install.packages("plotly")

library(ggplot2)
library(dplyr)
library(gt)
library(gtExtras)
library(plotly)


# ------------------------------
# Read the data
# ------------------------------
mydata <- read.csv("YOURFILENAME.csv") # <-- EDIT filename

# ################################################
# # (Optional) Remove specific case(es)s by row number
# ################################################
# # Example: remove rows 10 and 25
# rows_to_remove <- c(10, 25) # Edit and uncomment this line
# mydata <- mydata[-rows_to_remove, ] # Uncomment this line

# Specify dependent (DV) and independent (IV) variables
mydata$DV <- mydata$YOURDVNAME   # <-- EDIT DV column
mydata$IV <- mydata$YOURIVNAME   # <-- EDIT IV column

# Ensure DV is binary numeric (0/1)
mydata$DV <- as.numeric(as.character(mydata$DV))


# ------------------------------
# Logistic regression plot 
# ------------------------------
logit_plot <- ggplot(mydata, aes(x = IV, y = DV)) +
  geom_point(alpha = 0.5) +   # scatterplot of observed data
  geom_smooth(method = "glm",
              method.args = list(family = "binomial"),
              se = FALSE,
              color = "#1f78b4") +
  labs(title = "Logistic Regression Curve",
       x = "Independent Variable (IV)",
       y = "Dependent Variable (DV)")

logit_plotly <- ggplotly(logit_plot)


# ------------------------------
# Run logistic regression
# ------------------------------
options(scipen = 999)
log.ed <- glm(DV ~ IV, data = mydata, family = "binomial")

# Extract coefficients and odds ratios
results <- broom::tidy(log.ed, conf.int = TRUE, exponentiate = TRUE) %>%
  select(term, estimate, conf.low, conf.high, p.value) %>%
  rename(Odds_Ratio = estimate,
         CI_Lower = conf.low,
         CI_Upper = conf.high,
         P_Value = p.value)

# Display results as a nice gt table
results_table <- results %>%
  gt() %>%
  fmt_number(columns = c(Odds_Ratio, CI_Lower, CI_Upper), decimals = 3) %>%
  fmt_number(columns = P_Value, decimals = 4) %>%
  tab_header(
    title = "Logistic Regression Results",
    subtitle = "Odds Ratios with 95% Confidence Intervals"
  )


# ------------------------------
# Check linearity of the logit (Box-Tidwell test)
# ------------------------------
# (Assumes IV > 0; shift IV if needed)
mydata$IV_log <- mydata$IV * log(mydata$IV)
linearity_test <- glm(DV ~ IV + IV_log, data = mydata, family = "binomial")

linearity_results <- broom::tidy(linearity_test) %>%
  select(term, estimate, std.error, p.value) %>%
  rename(Estimate = estimate,
         Std_Error = std.error,
         P_Value = p.value)

linearity_table <- linearity_results %>%
  gt() %>%
  fmt_number(columns = c(Estimate, Std_Error), decimals = 3) %>%
  fmt_number(columns = P_Value, decimals = 4) %>%
  tab_header(
    title = "Linearity of the Logit Test (Box-Tidwell)",
    subtitle = "Interaction term indicates violation if significant"
  )


# ------------------------------
# Calculate the inflection point (p = .50)
# ------------------------------
p <- 0.50
Inflection_point <- (log(p/(1-p)) - coef(log.ed)[1]) / coef(log.ed)[2]

inflection_table <- tibble(
  Probability = 0.5,
  Inflection_Point = Inflection_point
) %>%
  gt() %>%
  fmt_number(columns = Inflection_Point, decimals = 3) %>%
  tab_header(
    title = "Inflection Point of Logistic Curve",
    subtitle = "Value of IV where predicted probability = 0.50"
  )


# ------------------------------
# Outputs
# ------------------------------
# Interactive plot
logit_plotly

# Tables
results_table
linearity_table
inflection_table

TNBB Access Data Logistic Regression

Marvin Rutherford Jr