Code
# ================================
# Binary Logistic Regression (Male vs Female)
# ================================
# Load libraries
library(ggplot2)
library(dplyr)
library(GGally)

# -------------------------------
# Load your data
# -------------------------------
data <- read.csv("SM_data.csv")

str(data)
'data.frame':   74 obs. of  5 variables:
 $ H : num  162 176 180 176 181 ...
 $ PL: num  10.4 20 20 12 20 ...
 $ AL: num  16.4 26 30 28 25 ...
 $ FL: num  21.6 27 22 15 28 8 24 15 25 21.4 ...
 $ MF: chr  "F" "M" "M" "M" ...
Code
# -------------------------------
# Convert response to factor
# -------------------------------
# Assume column name is "MF" with values "M" and "F"
data$MF <- as.factor((data$MF=="M")*1)

# Optional: set baseline (Female)
#data$MF <- relevel(data$MF, ref = "F")

# -------------------------------
# Cross Plot (Pairwise)
# -------------------------------
p_cross <- ggpairs(
  data,
  aes(color = MF, alpha = 0.7),
  upper = list(continuous = wrap("points", size = 1.2)),
  lower = list(continuous = wrap("smooth", method = "loess", se = FALSE)),
  diag  = list(continuous = "densityDiag")
) +
  theme_minimal()

print(p_cross)

Code
# -------------------------------
# Fit Logistic Regression Model
# -------------------------------
# Use all predictors
model <- glm(MF ~ ., data = data, family = binomial)

summary(model)

Call:
glm(formula = MF ~ ., family = binomial, data = data)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -45.10599   11.23952  -4.013 5.99e-05 ***
H             0.27793    0.06961   3.993 6.53e-05 ***
PL            0.03937    0.26488   0.149    0.882    
AL            0.02530    0.02524   1.002    0.316    
FL           -0.06259    0.20293  -0.308    0.758    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 95.945  on 73  degrees of freedom
Residual deviance: 45.097  on 69  degrees of freedom
AIC: 55.097

Number of Fisher Scoring iterations: 6
Code
# -------------------------------
# Predictions
# -------------------------------
prob <- predict(model, type = "response")
data$Prob_Male <- prob

# Convert to class (threshold = 0.5)
data$Predicted <- ifelse(prob > 0.5, "1", "0")
data$Predicted <- as.factor(data$Predicted)

# -------------------------------
# Accuracy
# -------------------------------
accuracy <- mean(data$Predicted == data$MF)
print(paste("Accuracy:", round(accuracy, 4)))
[1] "Accuracy: 0.9189"
Code
print(table(Predicted = data$Predicted, Actual = data$MF))
         Actual
Predicted  0  1
        0 23  3
        1  3 45
Code
# -------------------------------
# Visualization 1: Scatter (2 variables)
# -------------------------------
# Change variables depending on your dataset
p1 <- ggplot(data, aes(x = data[,2], y = data[,3], color = MF)) +
  geom_point(size = 2) +
  labs(title = "True Classes") +
  theme_minimal()

print(p1)

Code
# -------------------------------
# Visualization 2: Predicted Classes
# -------------------------------
p2 <- ggplot(data, aes(x = data[,2], y = data[,3], color = Predicted)) +
  geom_point(size = 2) +
  labs(title = "Predicted Classes (Logistic Regression)") +
  theme_minimal()

print(p2)

Code
# -------------------------------
# Visualization 3: Decision Boundary
# -------------------------------
# Use two important variables (modify if needed)
x_var <- names(data)[2]
y_var <- names(data)[3]

grid <- expand.grid(
  x = seq(min(data[[x_var]]), max(data[[x_var]]), length = 200),
  y = seq(min(data[[y_var]]), max(data[[y_var]]), length = 200)
)

colnames(grid) <- c(x_var, y_var)

# Fix other variables at mean
other_vars <- setdiff(names(data), c("MF", x_var, y_var, "Predicted", "Prob_Male"))

for (v in other_vars) {
  grid[[v]] <- mean(data[[v]], na.rm = TRUE)
}

# Predict on grid
grid$Prob <- predict(model, newdata = grid, type = "response")
grid$Class <- ifelse(grid$Prob > 0.5, "M", "F")

# Plot decision boundary
p3 <- ggplot() +
  geom_tile(data = grid, aes_string(x = x_var, y = y_var, fill = "Class"), alpha = 0.3) +
  geom_point(data = data, aes_string(x = x_var, y = y_var, color = "MF"), size = 1.5) +
  labs(title = "Decision Boundary (Binary Logistic)") +
  theme_minimal()

print(p3)