Read data file

binary <- read_csv("C:/Users/Lenovo/Downloads/binary.csv")

## Rows: 400 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (4): admit, gre, gpa, rank
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(binary) %>% gt()

admit	gre	gpa	rank
0	380	3.61	3
1	660	3.67	3
1	800	4.00	1
1	640	3.19	4
0	520	2.93	4
1	760	3.00	2

str(binary)

## spc_tbl_ [400 x 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ admit: num [1:400] 0 1 1 1 0 1 1 0 1 0 ...
##  $ gre  : num [1:400] 380 660 800 640 520 760 560 400 540 700 ...
##  $ gpa  : num [1:400] 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92 ...
##  $ rank : num [1:400] 3 3 1 4 4 2 1 2 3 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   admit = col_double(),
##   ..   gre = col_double(),
##   ..   gpa = col_double(),
##   ..   rank = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

We are studying the analysis of admitted students according to three predictors, gre,gpa and rank

Logistic Regression Model

mymodel <- multinom(admit~., data = binary)

## # weights:  5 (4 variable)
## initial  value 277.258872 
## final  value 229.720883 
## converged

MissClassification Rate

p <- predict(mymodel, binary)
tab <- table(p,binary$admit)
accuracy <- sum(diag(tab))/sum(tab)
print(accuracy)

## [1] 0.705

** Approx 70% accuracy for this model**

MissClassifficationRate <- 1-accuracy
print(MissClassifficationRate)

## [1] 0.295

** Approx 30%**

We want to know if the model is good with this accuracy! test the benchmark number of the admitted students

table(binary$admit)

## 
##   0   1 
## 273 127

Do the calculation manually 273/400=0.68, so it is about 68% still less the our logistic accuracy model

Model Performance Evaluation

library(ROCR)
pred <- predict(mymodel, binary, type='prob')
head(pred)

##          1          2          3          4          5          6 
## 0.18955119 0.31778152 0.71781816 0.14894795 0.09795241 0.37867798

hist(pred)

pred <- prediction(pred, binary$admit)
eval <- performance(pred, "acc")
plot(eval)
abline(h=0.71, v=0.45)

Identifying best cutoff and accuracy values

max <- which.max(slot(eval, "y.values")[[1]])
acc <- slot(eval, "y.values")[[1]][max]
print(acc)

## [1] 0.7175

cut <- slot(eval, "x.values")[[1]][max]
print(cut)

##       158 
## 0.4683497

print(c(Accuracy=acc, Cutoff= cut))

##   Accuracy Cutoff.158 
##  0.7175000  0.4683497

Receiver Operating Characteristic(ROC) Curve

roc <- performance(pred, "tpr","fpr")
plot(roc,
     colorize=T,
     main="ROC Curve",
     ylab="Sensitivity",
     xlab="1-Specificity")
abline(a=0, b=1)

Area Under Curve (AUC)

auc <- performance(pred, "auc")
auc <- unlist(slot(auc, "y.values"))
auc <- round(auc,4)
print(auc)

## [1] 0.6921

ROC AND AUC

Batoul Kalot

2025-06-18