# Simple R code: data, logistic model, table, and graphs

# Load packages (install if needed)
# install.packages(c("pROC","knitr"))
library(pROC)
library(knitr)

# 1. Example data
set.seed(1)
df <- data.frame(
  Age = sample(22:60, 100, replace = TRUE),
  Salary = sample(30000:90000, 100, replace = TRUE),
  YearsAtJob = sample(1:20, 100, replace = TRUE),
  Attrition = sample(c("Yes","No"), 100, replace = TRUE, prob = c(0.3,0.7))
)
df$Attrition <- factor(df$Attrition, levels = c("No","Yes"))

# 2. Train/test split (70/30)
set.seed(123)
train_idx <- sample(seq_len(nrow(df)), size = 0.7 * nrow(df))
train <- df[train_idx, ]
test  <- df[-train_idx, ]

# 3. Fit logistic regression
model <- glm(Attrition ~ Age + Salary + YearsAtJob, data = train, family = binomial)

# 4. Predict probabilities and classes on test set
prob <- predict(model, newdata = test, type = "response")   # probability of "Yes"
pred_class <- ifelse(prob > 0.5, "Yes", "No")

# 5. Simple results table (first 8 rows)
res <- data.frame(
  Age = test$Age,
  Salary = test$Salary,
  YearsAtJob = test$YearsAtJob,
  Actual = test$Attrition,
  Predicted = pred_class,
  Prob_Yes = round(prob, 3)
)
knitr::kable(head(res, 8), caption = "Test set predictions (first 8 rows)")
Test set predictions (first 8 rows)
Age Salary YearsAtJob Actual Predicted Prob_Yes
1 25 48664 11 No No 0.272
2 60 59244 2 No No 0.456
3 22 87712 20 Yes No 0.225
10 42 69669 14 Yes No 0.281
11 31 49249 5 Yes No 0.364
19 58 50838 4 No No 0.403
20 55 47531 16 Yes No 0.225
24 41 46801 20 No No 0.174
# 6. Plot: predicted probabilities for first 20 test cases
par(mfrow = c(1,2))   # two plots side-by-side
barplot(head(res$Prob_Yes, 20),
        names.arg = 1: min(20, nrow(res)),
        ylim = c(0,1),
        main = "Predicted Probabilities (first 20)",
        ylab = "P(Attrition = Yes)")
abline(h = 0.5, col = "red", lty = 2)

# 7. ROC and AUC
roc_obj <- roc(response = test$Attrition, predictor = prob, levels = c("No","Yes"))
plot(roc_obj, main = paste0("ROC curve (AUC = ", round(auc(roc_obj),3), ")"))

par(mfrow = c(1,1))