Load required
install.packages("ResourceSelection")
## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ResourceSelection' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\H P\AppData\Local\Temp\RtmpaIHoTF\downloaded_packages
install.packages("caret")
## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'caret' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'caret'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\H P\AppData\Local\R\win-library\4.4\00LOCK\caret\libs\x64\caret.dll to
## C:\Users\H P\AppData\Local\R\win-library\4.4\caret\libs\x64\caret.dll:
## Permission denied
## Warning: restored 'caret'
##
## The downloaded binary packages are in
## C:\Users\H P\AppData\Local\Temp\RtmpaIHoTF\downloaded_packages
install.packages("tidyverse")
## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\H P\AppData\Local\Temp\RtmpaIHoTF\downloaded_packages
library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 4.4.2
## ResourceSelection 0.3-6 2023-06-27
library(caret)
## Warning: package 'caret' was built under R version 4.4.2
## Loading required package: ggplot2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.4.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
set.seed(123)
n <- 1000
x1 <- rnorm(n)
x2 <- rnorm(n)
z <- 1 + 2*x1 + 3*x2
prob <- 1 / (1 + exp(-z))
outcome <- rbinom(n, 1, prob)
data <- data.frame(outcome = outcome, x1 = x1, x2 = x2) |>
mutate(outcome = if_else(outcome == 1, "yes", "no")) |>
mutate(outcome = factor(outcome))
# Perform binary logistic regression
model <- glm(outcome ~ x1 + x2, data = data, family = binomial)
# Summary of the model
summary(model)
##
## Call:
## glm(formula = outcome ~ x1 + x2, family = binomial, data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1877 0.1291 9.199 <2e-16 ***
## x1 2.1243 0.1768 12.018 <2e-16 ***
## x2 3.4635 0.2395 14.460 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1329.1 on 999 degrees of freedom
## Residual deviance: 515.4 on 997 degrees of freedom
## AIC: 521.4
##
## Number of Fisher Scoring iterations: 7
# Hosmer-Lemeshow goodness of fit test
hl_test <- hoslem.test(model$y,
fitted(model))
print(hl_test)
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: model$y, fitted(model)
## X-squared = 3.4942, df = 8, p-value = 0.8996
# Calculate predictions
predictions <- predict(model, type = "response")
predicted_classes <- ifelse(predictions > 0.5, "yes", "no")
# Create confusion matrix
conf_matrix <- confusionMatrix(factor(predicted_classes),
factor(data$outcome))
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 320 46
## yes 61 573
##
## Accuracy : 0.893
## 95% CI : (0.8722, 0.9115)
## No Information Rate : 0.619
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.7714
##
## Mcnemar's Test P-Value : 0.1759
##
## Sensitivity : 0.8399
## Specificity : 0.9257
## Pos Pred Value : 0.8743
## Neg Pred Value : 0.9038
## Prevalence : 0.3810
## Detection Rate : 0.3200
## Detection Prevalence : 0.3660
## Balanced Accuracy : 0.8828
##
## 'Positive' Class : no
##
# Calculate additional metrics
accuracy <- conf_matrix$overall["Accuracy"]
sensitivity <- conf_matrix$byClass["Sensitivity"]
specificity <- conf_matrix$byClass["Specificity"]
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.893
cat("Sensitivity:", sensitivity, "\n")
## Sensitivity: 0.839895
cat("Specificity:", specificity, "\n")
## Specificity: 0.9256866
# Plot ROC curve
library(pROC)
## Warning: package 'pROC' was built under R version 4.4.2
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
roc_curve <- roc(data$outcome, predictions)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
plot(roc_curve, main = "ROC Curve")

auc <- auc(roc_curve)
cat("AUC:", auc, "\n")
## AUC: 0.9563601