# Load necessary libraries
library(rms)
library(ggplot2)
library(tidyverse)
# Set seed for reproducibility
set.seed(123)
# Generate sample data
n <- 1000 # number of observations
x <- runif(n, -3, 3) # predictor
z <- 1 + 2 * x - x^2 # linear predictor
pr <- 1 / (1 + exp(-z)) # transform to probability
Y <- rbinom(n, 1, pr) # binary outcome
df <- data.frame(Y = Y, x = x)
# Fit logistic regression model with restricted cubic splines
ddist <- datadist(df)
options(datadist="ddist")
fit <- glm(Y ~ rcs(x, 4), data = df, family = binomial)
# Predict values
pred <- predict(fit, se.fit = TRUE)
df <- df %>%
mutate(predicted = pred$fit,
CIlow = pred$fit + qnorm(0.025)*pred$se.fit,
CIhigh = pred$fit + qnorm(0.975)*pred$se.fit)
# Plot with confidence intervals
fig <- ggplot(df, aes(x = x)) +
geom_point(aes(y = predicted)) +
geom_ribbon(aes(ymin = CIlow, ymax = CIhigh), alpha = 0.2) +
labs(x = "x",
y = "Predicted Logit",
title = "Restricted Cubic Spline in Logistic Regression with Confidence Intervals")
fig
