Bayesian Binary Logistic Regression: Theory, Implementation, and Applications

1 Implementation Using Simulated Dataset

# Simulation parameters
n <- 500  # Sample size
beta_true <- c(-1.5, 0.8, -0.5)  # True parameters: intercept, x1, x2

# Generate predictors
x1 <- rnorm(n)
x2 <- rnorm(n)
X <- cbind(1, x1, x2)

# Generate latent variables and binary responses
z <- X %*% beta_true + rlogis(n)
y <- as.numeric(z > 0)

# Sample from truncated logistic using rejection sampling
rtrunc_logis <- function(n, location = 0, scale = 1, a = -Inf, b = Inf) {
  samples <- numeric(n)
  for (i in 1:n) {
    repeat {
      x <- rlogis(1, location = location, scale = scale)
      if (x >= a && x <= b) {
        samples[i] <- x
        break
      }
    }
  }
  return(samples)
}

# Gibbs sampler function
gibbs_logit <- function(y, X, m0, V0, n_iter = 10000, burnin = 2000) {
  n <- nrow(X)
  p <- ncol(X)
  
  # Initialize storage
  beta_samples <- matrix(NA, n_iter, p)
  z_samples <- matrix(NA, n_iter, n)
  
  # Initialize parameters
  beta <- solve(t(X) %*% X) %*% t(X) %*% (y - 0.5)  # Starting value
  
  # Precision matrix
  V0_inv <- solve(V0)
  Vn_inv <- V0_inv + t(X) %*% X
  Vn <- solve(Vn_inv)
  
  for (t in 1:n_iter) {
    # Sample latent variables z
    eta <- X %*% beta
    z <- numeric(n)
    for (i in 1:n) {
      if (y[i] == 1) {
        z[i] <- rtrunc_logis(1, location = eta[i], scale = 1, a = 0, b = Inf)
      } else {
        z[i] <- rtrunc_logis(1, location = eta[i], scale = 1, a = -Inf, b = 0)
      }
    }
    
    # Sample beta
    mn <- Vn %*% (V0_inv %*% m0 + t(X) %*% z)
    beta <- c(mvtnorm::rmvnorm(1, mean = mn, sigma = Vn))
    
    # Store samples
    beta_samples[t, ] <- beta
    z_samples[t, ] <- z
  }
  
  # Discard burn-in
  beta_samples <- beta_samples[(burnin + 1):n_iter, ]
  
  return(list(beta = beta_samples, z = z_samples))
}

# Run Gibbs sampler
sim_results <- gibbs_logit(
  y = y, 
  X = X, 
  m0 = c(0, 0, 0), 
  V0 = diag(100, 3),
  n_iter = 20000,
  burnin = 2000
)

# Posterior summary
sim_beta <- sim_results$beta
colnames(sim_beta) <- c("Intercept", "x1", "x2")

sim_summary <- data.frame(
  Parameter = colnames(sim_beta),
  True_Value = beta_true,
  Mean = colMeans(sim_beta),
  SD = apply(sim_beta, 2, sd),
  HDI_2.5 = apply(sim_beta, 2, function(x) hdi(x)[1]),
  HDI_97.5 = apply(sim_beta, 2, function(x) hdi(x)[2]),
  row.names = NULL
)

kable(sim_summary, digits = 2, caption = "Simulation Results: Parameter Recovery") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)

Simulation Results: Parameter Recovery
Parameter	True_Value	Mean	SD	HDI_2.5	HDI_97.5
Intercept	-1.5	-1.58	0.09	-1.76	-1.40
x1	0.8	0.62	0.10	0.43	0.82
x2	-0.5	-0.52	0.09	-0.69	-0.33

1.1 Diagnostic Procedures

1.1.1 Convergence Diagnostics

1.1.1.1 Trace Plots

# Convert to mcmc object
mcmc_obj <- as.mcmc(sim_beta)

# Create named vector for true values
names(beta_true) <- colnames(sim_beta)

# Generate trace plot
mcmc_trace(mcmc_obj) +
  ggtitle("Trace Plots for Simulated Data") +
  theme_minimal() +
  theme(legend.position = "none") +
  geom_hline(
    data = data.frame(parameter = names(beta_true), value = beta_true),
    aes(yintercept = value),
    linetype = "dashed",
    color = "red",
    inherit.aes = FALSE
  )

1.1.1.2 Autocorrelation Function (ACF)

# Convert to matrix
sim_mat <- as.matrix(sim_beta)

# Use the actual column names
params <- c("Intercept", "x1", "x2")

# Confidence interval limits
n_iter <- nrow(sim_mat)
ci_limit <- qnorm(0.975) / sqrt(n_iter)

# Extract ACF for each parameter and store in one data frame
acf_list <- map_dfr(params, function(p) {
  acf_obj <- acf(sim_mat[, p], lag.max = 20, plot = FALSE)
  data.frame(
    Lag = acf_obj$lag[, 1, 1],
    ACF = acf_obj$acf[, 1, 1],
    Parameter = p
  )
})

# Plot using ggplot2
ggplot(acf_list, aes(x = Lag, y = ACF)) +
  geom_bar(stat = "identity", width = 0.2, fill = "steelblue") +
  geom_hline(yintercept = 0, color = "black") +
  geom_hline(yintercept = c(-ci_limit, ci_limit), linetype = "dashed", color = "red") +
  facet_wrap(~Parameter, scales = "free_y") +
  theme_minimal() +
  labs(title = "Autocorrelation Plots", y = "Autocorrelation")

1.1.1.3 Geweke Diagnostics

geweke <- geweke.diag(as.mcmc(sim_beta))

# Extract the z-scores
z_scores <- geweke$z

# Compute p-values
p_values <- 2 * (1 - pnorm(abs(z_scores)))

# Combine into a data frame
geweke_res <- data.frame(
  Parameter = names(z_scores),
  Z_score = round(z_scores, 4),
  P_value = round(p_values, 4),
  row.names = NULL
)

# Print the table with kable
cat("**Geweke Diagnostic Results**\n")

## **Geweke Diagnostic Results**

kable(geweke_res, format = "markdown", align = "lcc")

Parameter	Z_score	P_value
Intercept	0.2609	0.7942
x1	-0.4915	0.6231
x2	0.2780	0.7810

1.1.1.4 Inefficiency Factors

# Define inefficiency factor function
inefficiency_factor <- function(chain) {
  acf_vals <- acf(chain, plot = FALSE, na.action = na.pass)$acf
  if (length(acf_vals) > 1) 1 + 2 * sum(acf_vals[-1]) else 1
}

# Apply to each parameter in sim_beta
if_factors <- apply(sim_beta, 2, inefficiency_factor)

# Create data frame with rounded values
if_df <- data.frame(
  Parameter = colnames(sim_beta),
  Inefficiency_Factor = round(if_factors, 4),
  row.names = NULL
)

# Print using kable
cat("**Inefficiency Factors (Autocorrelation Time Estimates)**\n")

## **Inefficiency Factors (Autocorrelation Time Estimates)**

kable(if_df, format = "markdown", align = "lc")

Parameter	Inefficiency_Factor
Intercept	4.8584
x1	4.2157
x2	3.8616

1.1.2 Model Comparison: Deviance Information Criterion (DIC)

compute_dic <- function(model, X, y) {
  dev <- function(beta) {
    eta <- X %*% beta
    p <- plogis(eta)
    -2 * sum(dbinom(y, 1, p, log = TRUE))
  }
  dev_samples <- apply(model, 1, dev)
  D_bar <- mean(dev_samples)
  theta_hat <- colMeans(model)
  D_hat <- dev(theta_hat)
  pD <- D_bar - D_hat
  c(DIC = D_bar + pD, pD = pD, Dbar = D_bar)
}

dic_results <- compute_dic(sim_beta, X, y)
kable(t(dic_results), caption = "DIC Results for Simulated Data") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)

DIC Results for Simulated Data
DIC	pD	Dbar
456.2551	1.688558	454.5666

1.2 Sensitivity Analysis

library(ggridges)

# Define prior variances
prior_vars <- c(1, 10, 100, 1000)

# Simulate and stack results
sensitivity_results <- map_dfr(prior_vars, function(pvar) {
  res <- gibbs_logit(
    y = y, 
    X = X, 
    m0 = c(0, 0, 0), 
    V0 = diag(pvar, 3),
    n_iter = 20000,
    burnin = 2000
  )
  
  beta_post <- res$beta
  data.frame(
    Prior_Variance = pvar,
    Parameter = rep(c("Intercept", "x1", "x2"), each = nrow(beta_post)),
    Value = c(beta_post)
  )
})

# True beta parameters
beta_true <- c(Intercept = -1.5, x1 = 0.8, x2 = -0.5)

# Summary function for 95% credible interval and median
ci_summary <- function(x) {
  qs <- quantile(x, probs = c(0.025, 0.5, 0.975))
  data.frame(ymin = qs[1], y = qs[2], ymax = qs[3])
}

# Plot
ggplot(sensitivity_results, aes(x = factor(Prior_Variance), y = Value)) +
  stat_summary(fun.data = ci_summary, geom = "pointrange", color = "blue") +
  facet_wrap(~ Parameter, scales = "free_y") +
  geom_hline(data = data.frame(
    Parameter = names(beta_true),
    beta_true = beta_true
  ), aes(yintercept = beta_true), color = "red", linetype = "dashed") +
  labs(
    title = "Prior Sensitivity Analysis with 95% Credible Intervals",
    x = "Prior Variance",
    y = "Posterior Median and 95% CI"
  ) +
  theme_minimal()

2 Application: Bayesian Analysis of Benthic Macroinvertebrate (BMI) Presence

2.1 Data

# Load and prepare the BMI dataset
bmi_data <- read.csv("BMI.csv") %>% 
  mutate(
    Presence = as.integer(Presence),
    across(starts_with("Water_"), ~as.numeric(scale(.x)))  # Standardize
  )

# Prepare analysis data for modeling
analysis_data <- bmi_data %>% 
  select(Species, Presence, starts_with("Water_")) %>%
  select(-Water_FlowVelocity, -Water_DO, -Water_Phosphate)

# Water_ values rounded to 4 decimals
table_data <- analysis_data %>% 
  mutate(across(starts_with("Water_"), ~round(.x, 4))) %>% 
  head()

# Show the table
kable(table_data, caption = "First 6 Observations")

First 6 Observations
Species	Presence	Water_ChannelDepth	Water_Temp	Water_pH	Water_Salinity	Water_TSS	Water_BOD	Water_Fecal
Chironomidae	1	2.5591	0.8475	-1.4939	2.58	2.0534	-0.4295	1.0563
Baetidae	0	2.5591	0.8475	-1.4939	2.58	2.0534	-0.4295	1.0563
Caenidae	0	2.5591	0.8475	-1.4939	2.58	2.0534	-0.4295	1.0563
Hydropsychidae	0	2.5591	0.8475	-1.4939	2.58	2.0534	-0.4295	1.0563
Muscidae	0	2.5591	0.8475	-1.4939	2.58	2.0534	-0.4295	1.0563
Isonychiidae	0	2.5591	0.8475	-1.4939	2.58	2.0534	-0.4295	1.0563

2.2 Bayesian Analysis

# Run Bayesian logistic regression
bmi_model <- MCMClogit(
  Presence ~ Water_ChannelDepth+Water_Temp+Water_pH+Water_Salinity+Water_TSS+Water_BOD+Water_Fecal,
  data = analysis_data,
  b0 = 0,          # Prior mean vector
  B0 = 0.1,        # Prior precision (1/variance) where variance=10
  mcmc = 50000, 
  burnin = 5000
)

# Convert to coda object for diagnostics
bmi_coda <- as.mcmc(bmi_model)

# Posterior summary
post_summary <- summary(bmi_coda)
HDI_intervals <- hdi(bmi_coda)

# Create results table
results_df <- data.frame(
  Parameter = c("Intercept", colnames(bmi_model)[-1]),
  Mean = post_summary$statistics[,1],
  SD = post_summary$statistics[,2],
  HDI_2.5 = HDI_intervals["lower",],
  HDI_97.5 = HDI_intervals["upper",],
  P_effect_gt0 = apply(bmi_model, 2, function(x) mean(x > 0))
)

# Format table
kable(results_df, digits = 3, 
      caption = "Posterior Summary: Water Quality Effects on Presence") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  column_spec(6, background = ifelse(results_df$P_effect_gt0 > 0.975 | 
                                     results_df$P_effect_gt0 < 0.025, 
                                   "#FFDDDD", "none"))

Posterior Summary: Water Quality Effects on Presence
	Parameter	Mean	SD	HDI_2.5	HDI_97.5	P_effect_gt0
(Intercept)	Intercept	-1.101	0.065	-1.231	-0.975	0.000
Water_ChannelDepth	Water_ChannelDepth	0.182	0.111	-0.034	0.401	0.953
Water_Temp	Water_Temp	-0.174	0.119	-0.399	0.059	0.069
Water_pH	Water_pH	0.034	0.121	-0.201	0.266	0.610
Water_Salinity	Water_Salinity	-0.125	0.152	-0.431	0.163	0.208
Water_TSS	Water_TSS	-0.455	0.131	-0.711	-0.198	0.000
Water_BOD	Water_BOD	0.096	0.082	-0.068	0.255	0.880
Water_Fecal	Water_Fecal	0.386	0.112	0.162	0.604	1.000

2.3 Diagnostic Checks

2.3.1 Convergence Assessments

Trace Plots

library(bayesplot)
library(ggplot2)

# Extract posterior means
posterior_means <- colMeans(as.matrix(bmi_coda))

# Create a data frame to use for adding horizontal lines per facet
mean_lines_df <- data.frame(
  parameter = names(posterior_means),
  mean = posterior_means
)

# Trace plot with posterior mean as reference line per facet
mcmc_trace(bmi_coda) +
  geom_hline(data = mean_lines_df, aes(yintercept = mean), 
             linetype = "dashed", color = "red") +
  ggtitle("Trace Plots with Posterior Means") +
  theme_minimal() +
  theme(legend.position = "none")

2.3.2 Quantitative Diagnostics

Geweke Test

geweke_results <- geweke.diag(bmi_coda)

# Extract the z-scores
Zscores <- geweke_results$z

# Compute p-values
Pvalues <- 2 * (1 - pnorm(abs(Zscores)))

# Combine into a data frame
geweke_resu <- data.frame(
  Parameter = names(Zscores),
  Z_score = round(Zscores, 4),
  P_value = round(Pvalues, 4),
  row.names = NULL
)

# Print the table with kable
cat("**Geweke Diagnostic Results**\n")

## **Geweke Diagnostic Results**

kable(geweke_resu, format = "markdown", align = "lcc")

Parameter	Z_score	P_value
(Intercept)	0.9900	0.3222
Water_ChannelDepth	0.3777	0.7056
Water_Temp	1.3222	0.1861
Water_pH	-0.2063	0.8366
Water_Salinity	-1.6729	0.0943
Water_TSS	0.4652	0.6418
Water_BOD	-1.5982	0.1100
Water_Fecal	0.1164	0.9073

Inefficiency Factors

inefficiency_factor <- function(chain) {
  acf_vals <- acf(chain, plot = FALSE, na.action = na.pass)$acf
  if (length(acf_vals) > 1) 1 + 2 * sum(acf_vals[-1]) else 1
}

# Apply to each parameter in bmi_coda
if_factorsAp <- apply(bmi_coda, 2, inefficiency_factor)

# Create data frame with rounded values
if_dfAp <- data.frame(
  Parameter = colnames(bmi_coda),
  Inefficiency_Factor = round(if_factorsAp, 4),
  row.names = NULL
)

# Print using kable
cat("**Inefficiency Factors (Autocorrelation Time Estimates)**\n")

## **Inefficiency Factors (Autocorrelation Time Estimates)**

kable(if_dfAp, format = "markdown", align = "lc")

Parameter	Inefficiency_Factor
(Intercept)	27.3402
Water_ChannelDepth	26.4920
Water_Temp	27.1102
Water_pH	27.2884
Water_Salinity	29.0480
Water_TSS	26.0537
Water_BOD	26.1459
Water_Fecal	26.4845

2.4 Sensitivity Analysis

2.4.1 Prior Variance Sensitivity

library(MCMCpack)
library(tidyverse)

# Fit baseline model for reference line
baseline_model <- MCMClogit(
  Presence ~ Water_ChannelDepth + Water_Temp + Water_pH +  
             Water_Salinity + Water_TSS + Water_BOD + Water_Fecal,
  data = analysis_data,
  b0 = 0,
  B0 = 0.1,  # Equivalent to prior variance = 10
  mcmc = 50000,
  burnin = 5000
)

# Get posterior means for reference lines
baseline_means <- colMeans(as.matrix(baseline_model))  # Named vector
reference_lines <- data.frame(
  Parameter = names(baseline_means),
  Mean = baseline_means
)

# Run models for various prior variances
prior_vars <- c(1, 10, 100, 1000)

sensitivity_res <- map_dfr(prior_vars, function(pvar) {
  model <- MCMClogit(
    Presence ~ Water_ChannelDepth + Water_Temp + Water_pH +  
               Water_Salinity + Water_TSS + Water_BOD + Water_Fecal,
    data = analysis_data,
    b0 = 0,
    B0 = 1 / pvar,  # precision = 1 / variance
    mcmc = 20000,
    burnin = 2000
  )
  
  draws <- as.data.frame(model)
  draws$Prior_Variance <- pvar
  draws_long <- pivot_longer(draws, 
                              cols = -Prior_Variance, 
                              names_to = "Parameter", 
                              values_to = "Value")
  draws_long
})

# Summary function for 95% CI and median
ci_summary <- function(x) {
  qs <- quantile(x, probs = c(0.025, 0.5, 0.975))
  data.frame(ymin = qs[1], y = qs[2], ymax = qs[3])
}

# Plot using stat_summary
ggplot(sensitivity_res, aes(x = factor(Prior_Variance), y = Value)) +
  stat_summary(fun.data = ci_summary, geom = "pointrange", color = "blue") +
  facet_wrap(~ Parameter, scales = "free_y") +
  geom_hline(data = reference_lines, 
             aes(yintercept = Mean), 
             color = "red", linetype = "dashed") +
  labs(
    title = "Prior Sensitivity Analysis with 95% Credible Intervals",
    x = "Prior Variance",
    y = "Posterior Median and 95% CI"
  ) +
  theme_minimal()

2.4.2 Predictor Selection Sensitivity

# Prepare design matrix and response
X_matrix <- model.matrix(
  ~ Water_ChannelDepth + Water_Temp + Water_pH +  
             Water_Salinity + Water_TSS + Water_BOD + Water_Fecal,
  data = analysis_data
)
y_vector <- analysis_data$Presence

# Function to compute DIC
compute_dic <- function(model, X, y) {
  dev <- function(beta) {
    eta <- X %*% beta
    p <- plogis(eta)
    -2 * sum(dbinom(y, 1, p, log = TRUE))
  }
  dev_samples <- apply(model, 1, dev)
  D_bar <- mean(dev_samples)
  theta_hat <- colMeans(model)
  D_hat <- dev(theta_hat)
  pD <- D_bar - D_hat
  c(DIC = D_bar + pD, pD = pD, Dbar = D_bar)
}

# Fit reduced models
reduced_model <- MCMClogit(
  Presence ~ Water_TSS + Water_Fecal,
  data = analysis_data,
  b0 = 0, B0 = 0.1,
  mcmc = 50000, burnin = 5000
)

# Calculate DICs
X_reduced <- model.matrix(
  ~ Water_TSS + Water_Fecal,
  data = analysis_data
)

dic_full <- compute_dic(bmi_model, X_matrix, y_vector)
dic_reduced <- compute_dic(reduced_model, X_reduced, y_vector)

# Compare models
model_comparison <- data.frame(
  Model = c("Full", "Reduced"),
  DIC = c(dic_full["DIC"], dic_reduced["DIC"]),
  pD = c(dic_full["pD"], dic_reduced["pD"])
)

kable(model_comparison, digits = 1, 
      caption = "Model Comparison Using DIC") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)

Model Comparison Using DIC
Model	DIC	pD
Full	1513.4	8.1
Reduced	1513.9	3.0

2.5 Hypothesis Testing

Credible Intervals Approach

# Calculate 95% credible intervals for all parameters
credible_intervals <- data.frame(
  Parameter = colnames(bmi_model),
  Mean = colMeans(bmi_model),
  Lower = apply(bmi_model, 2, function(x) quantile(x, 0.025)),
  Upper = apply(bmi_model, 2, function(x) quantile(x, 0.975)))
  
# Identify significant parameters (where interval doesn't contain 0)
credible_intervals$Significant <- with(credible_intervals, 
                                      Lower > 0 | Upper < 0)

# Display results
kable(credible_intervals, digits = 3,
      caption = "95% Credible Intervals for Water Quality Parameters") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  column_spec(5, background = ifelse(credible_intervals$Significant, 
                                   "#E6FFE6", "none"))

95% Credible Intervals for Water Quality Parameters
	Parameter	Mean	Lower	Upper	Significant
(Intercept)	(Intercept)	-1.101	-1.233	-0.977	TRUE
Water_ChannelDepth	Water_ChannelDepth	0.182	-0.032	0.405	FALSE
Water_Temp	Water_Temp	-0.174	-0.409	0.055	FALSE
Water_pH	Water_pH	0.034	-0.203	0.264	FALSE
Water_Salinity	Water_Salinity	-0.125	-0.425	0.175	FALSE
Water_TSS	Water_TSS	-0.455	-0.717	-0.199	TRUE
Water_BOD	Water_BOD	0.096	-0.065	0.262	FALSE
Water_Fecal	Water_Fecal	0.386	0.162	0.604	TRUE

Probability of Direction (PoD)

# Calculate probability of direction for each parameter
pod_results <- data.frame(
  Parameter = colnames(bmi_model),
  Prob_Positive = apply(bmi_model, 2, function(x) mean(x > 0)),
  Prob_Negative = apply(bmi_model, 2, function(x) mean(x < 0)),
  Effect_Direction = ifelse(
    apply(bmi_model, 2, function(x) mean(x > 0)) > 0.95, "Positive",
    ifelse(apply(bmi_model, 2, function(x) mean(x < 0)) > 0.95, 
           "Negative", "Uncertain"))
)

# Display results
kable(pod_results, digits = 3,
      caption = "Probability of Direction for Water Quality Effects") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  column_spec(4, background = case_when(
    pod_results$Effect_Direction == "Positive" ~ "#E6F3FF",
    pod_results$Effect_Direction == "Negative" ~ "#FFE6E6",
    TRUE ~ "none"))

Probability of Direction for Water Quality Effects
	Parameter	Prob_Positive	Prob_Negative	Effect_Direction
(Intercept)	(Intercept)	0.000	1.000	Negative
Water_ChannelDepth	Water_ChannelDepth	0.953	0.047	Positive
Water_Temp	Water_Temp	0.069	0.931	Uncertain
Water_pH	Water_pH	0.610	0.390	Uncertain
Water_Salinity	Water_Salinity	0.208	0.792	Uncertain
Water_TSS	Water_TSS	0.000	1.000	Negative
Water_BOD	Water_BOD	0.880	0.120	Uncertain
Water_Fecal	Water_Fecal	1.000	0.000	Positive

Bayes Factor Analysis

# Function to calculate Savage-Dickey Bayes Factor approximation
calculate_bf <- function(samples, null_value = 0) {
  # Estimate density at null value
  dens_null <- density(samples)
  f_null <- approx(dens_null$x, dens_null$y, xout = null_value)$y
  
  # Prior density at null (using same prior as model: N(0, 10))
  prior_sd <- sqrt(10)
  f_prior <- dnorm(null_value, mean = 0, sd = prior_sd)
  
  # Bayes factor (H1/H0)
  bf <- f_prior / f_null
  return(bf)
}

# Calculate Bayes factors for all parameters
bf_results <- data.frame(
  Parameter = colnames(bmi_model),
  Bayes_Factor = apply(bmi_model, 2, calculate_bf))
  
# Interpret BF strength following Jeffreys' scale
bf_results$Evidence <- cut(bf_results$Bayes_Factor,
  breaks = c(0, 1/100, 1/30, 1/10, 1/3, 1, 3, 10, 30, 100, Inf),
  labels = c("Extreme for H0", "Very strong for H0", "Strong for H0",
             "Moderate for H0", "Weak for H0", "Weak for H1",
             "Moderate for H1", "Strong for H1", "Very strong for H1",
             "Extreme for H1"),
  include.lowest = TRUE)

# Display results
kable(bf_results, digits = 2,
      caption = "Bayes Factor Analysis of Water Quality Effects") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
  column_spec(3, background = case_when(
    bf_results$Bayes_Factor > 10 ~ "#E6FFE6",
    bf_results$Bayes_Factor < 1/10 ~ "#FFE6E6",
    TRUE ~ "none"))

Bayes Factor Analysis of Water Quality Effects
	Parameter	Bayes_Factor	Evidence
(Intercept)	(Intercept)	NA	NA
Water_ChannelDepth	Water_ChannelDepth	0.14	Moderate for H0
Water_Temp	Water_Temp	0.12	Moderate for H0
Water_pH	Water_pH	0.04	Strong for H0
Water_Salinity	Water_Salinity	0.08	Strong for H0
Water_TSS	Water_TSS	95.09	Very strong for H1
Water_BOD	Water_BOD	0.05	Strong for H0
Water_Fecal	Water_Fecal	34.35	Very strong for H1

Bayesian Binary Logistic Regression: Theory, Implementation, and Applications

May Ann Palen

July 15, 2025

1 Implementation Using Simulated Dataset

1.1 Diagnostic Procedures

1.1.1 Convergence Diagnostics

1.1.1.1 Trace Plots

1.1.1.2 Autocorrelation Function (ACF)

1.1.1.3 Geweke Diagnostics

1.1.1.4 Inefficiency Factors

1.1.2 Model Comparison: Deviance Information Criterion (DIC)

1.2 Sensitivity Analysis

2 Application: Bayesian Analysis of Benthic Macroinvertebrate (BMI) Presence

2.1 Data

2.2 Bayesian Analysis

2.3 Diagnostic Checks

2.3.1 Convergence Assessments

2.3.2 Quantitative Diagnostics

2.4 Sensitivity Analysis

2.4.1 Prior Variance Sensitivity

2.4.2 Predictor Selection Sensitivity

2.5 Hypothesis Testing