# Load necessary libraries
library(rms)
library(ggplot2)
library(tidyverse)
# Set seed for reproducibility
set.seed(123)
# Generate sample data
n <- 1000  # number of observations
x <- runif(n, -3, 3)  # predictor
z <- 1 + 2 * x - x^2  # linear predictor
pr <- 1 / (1 + exp(-z))  # transform to probability
Y <- rbinom(n, 1, pr)  # binary outcome

df <- data.frame(Y = Y, x = x)
# Fit logistic regression model with restricted cubic splines
ddist <- datadist(df)
options(datadist="ddist")
fit <- glm(Y ~ rcs(x, 4), data = df, family = binomial)
# Predict values
pred <- predict(fit, se.fit = TRUE)

df <- df %>% 
  mutate(predicted = pred$fit, 
         CIlow = pred$fit + qnorm(0.025)*pred$se.fit, 
         CIhigh = pred$fit + qnorm(0.975)*pred$se.fit)
# Plot with confidence intervals
fig <- ggplot(df, aes(x = x)) +
  geom_point(aes(y = predicted)) +
  geom_ribbon(aes(ymin = CIlow, ymax = CIhigh), alpha = 0.2) +
  labs(x = "x", 
       y = "Predicted Logit", 
       title = "Restricted Cubic Spline in Logistic Regression with Confidence Intervals")

fig