1 1. Introduction

This short example demonstrates how to perform a logistic regression in R using the glm() function with a binary outcome.
We’ll predict whether a student is admitted (admit = 1) or not (admit = 0) based on their GRE score and GPA.


2 2. Load and Inspect the Data

# Create a small sample dataset
data <- data.frame(
  admit = c(1,0,1,0,1,0,0,1,0,1),
  gre   = c(800, 640, 700, 580, 720, 600, 500, 750, 620, 710),
  gpa   = c(4.0, 3.5, 3.8, 3.2, 3.9, 3.3, 2.9, 3.7, 3.4, 3.9)
)

# View first few rows
head(data)
##   admit gre gpa
## 1     1 800 4.0
## 2     0 640 3.5
## 3     1 700 3.8
## 4     0 580 3.2
## 5     1 720 3.9
## 6     0 600 3.3

3 3. Fit a Logistic Regression Model

# glm() fits generalized linear models
# family = binomial specifies logistic regression

model <- glm(admit ~ gre + gpa, data = data, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Summarize model output
summary(model)
## 
## Call:
## glm(formula = admit ~ gre + gpa, family = binomial, data = data)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.593e+02  1.430e+06       0        1
## gre          2.486e-01  3.030e+03       0        1
## gpa          1.077e+02  7.607e+05       0        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.3863e+01  on 9  degrees of freedom
## Residual deviance: 2.4490e-10  on 7  degrees of freedom
## AIC: 6
## 
## Number of Fisher Scoring iterations: 25

4 4. Predict Admission Probabilities

# Predict probabilities for each observation
data$predicted_prob <- predict(model, type = "response")

# Display dataset with predicted probabilities
data
##    admit gre gpa predicted_prob
## 1      1 800 4.0   1.000000e+00
## 2      0 640 3.5   6.605993e-11
## 3      1 700 3.8   1.000000e+00
## 4      0 580 3.2   2.220446e-16
## 5      1 720 3.9   1.000000e+00
## 6      0 600 3.3   2.220446e-16
## 7      0 500 2.9   2.220446e-16
## 8      1 750 3.7   1.000000e+00
## 9      0 620 3.4   2.220446e-16
## 10     1 710 3.9   1.000000e+00

5 5. Classify Predictions

# Convert probabilities to binary predictions using threshold = 0.5
data$predicted_class <- ifelse(data$predicted_prob >= 0.5, 1, 0)

# Display final predictions
data
##    admit gre gpa predicted_prob predicted_class
## 1      1 800 4.0   1.000000e+00               1
## 2      0 640 3.5   6.605993e-11               0
## 3      1 700 3.8   1.000000e+00               1
## 4      0 580 3.2   2.220446e-16               0
## 5      1 720 3.9   1.000000e+00               1
## 6      0 600 3.3   2.220446e-16               0
## 7      0 500 2.9   2.220446e-16               0
## 8      1 750 3.7   1.000000e+00               1
## 9      0 620 3.4   2.220446e-16               0
## 10     1 710 3.9   1.000000e+00               1

6 6.Evaluate Model Performance

# Confusion matrix
table(Predicted = data$predicted_class, Actual = data$admit)
##          Actual
## Predicted 0 1
##         0 5 0
##         1 0 5
# Calculate accuracy
accuracy <- mean(data$predicted_class == data$admit)
print(paste("Model Accuracy:", round(accuracy, 2)))
## [1] "Model Accuracy: 1"

7 7. Visualize the Logistic Curve

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
ggplot(data, aes(x = gre, y = predicted_prob)) +
  geom_point(color = "darkred", size = 3) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), se = FALSE, color = "blue") +
  labs(title = "Logistic Regression Curve",
       x = "GRE Score",
       y = "Predicted Probability of Admission")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred